Skip to content

Commit

Permalink
Gracefully ERROR in _init_instance if vnic_type changed
Browse files Browse the repository at this point in the history
If the vnic_type of a bound port changes from "direct" to "macvtap" and
then the compute service is restarted then during _init_instance nova
tries to plug the vif of the changed port. However as it now has macvtap
vnic_type nova tries to look up the netdev of the parent VF. Still that
VF is consumed by the instance so there is no such netdev on the host
OS. This error killed the compute service at startup due to unhandled
exception. This patch adds the exception handler, logs an ERROR and
continue initializing other instances on the host.

Also this patch adds a detailed ERROR log when nova detects that the
vnic_type changed during _heal_instance_info_cache periodic.

Closes-Bug: #1981813
Change-Id: I1719f8eda04e8d15a3b01f0612977164c4e55e85
(cherry picked from commit e43bf90)
  • Loading branch information
gibizer committed Jan 11, 2023
1 parent 4954f99 commit a28c827
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 7 deletions.
14 changes: 14 additions & 0 deletions nova/compute/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,6 +1242,20 @@ def _init_instance(self, context, instance):
'updated.', instance=instance)
self._set_instance_obj_error_state(instance)
return
except exception.PciDeviceNotFoundById:
# This is bug 1981813 where the bound port vnic_type has changed
# from direct to macvtap. Nova does not support that and it
# already printed an ERROR when the change is detected during
# _heal_instance_info_cache. Now we print an ERROR again and skip
# plugging the vifs but let the service startup continue to init
# the other instances
LOG.exception(
'Virtual interface plugging failed for instance. Probably the '
'vnic_type of the bound port has been changed. Nova does not '
'support such change.',
instance=instance
)
return

if instance.task_state == task_states.RESIZE_MIGRATING:
# We crashed during resize/migration, so roll back for safety
Expand Down
34 changes: 34 additions & 0 deletions nova/network/neutron.py
Original file line number Diff line number Diff line change
Expand Up @@ -3383,6 +3383,25 @@ def _build_vif_model(self, context, client, current_neutron_port,
delegate_create=True,
)

def _log_error_if_vnic_type_changed(
self, port_id, old_vnic_type, new_vnic_type, instance
):
if old_vnic_type and old_vnic_type != new_vnic_type:
LOG.error(
'The vnic_type of the bound port %s has '
'been changed in neutron from "%s" to '
'"%s". Changing vnic_type of a bound port '
'is not supported by Nova. To avoid '
'breaking the connectivity of the instance '
'please change the port vnic_type back to '
'"%s".',
port_id,
old_vnic_type,
new_vnic_type,
old_vnic_type,
instance=instance
)

def _build_network_info_model(self, context, instance, networks=None,
port_ids=None, admin_client=None,
preexisting_port_ids=None,
Expand Down Expand Up @@ -3456,6 +3475,12 @@ def _build_network_info_model(self, context, instance, networks=None,
preexisting_port_ids)
for index, vif in enumerate(nw_info):
if vif['id'] == refresh_vif_id:
self._log_error_if_vnic_type_changed(
vif['id'],
vif['vnic_type'],
refreshed_vif['vnic_type'],
instance,
)
# Update the existing entry.
nw_info[index] = refreshed_vif
LOG.debug('Updated VIF entry in instance network '
Expand Down Expand Up @@ -3505,13 +3530,22 @@ def _build_network_info_model(self, context, instance, networks=None,
networks, port_ids = self._gather_port_ids_and_networks(
context, instance, networks, port_ids, client)

old_nw_info = instance.get_network_info()
nw_info = network_model.NetworkInfo()
for port_id in port_ids:
current_neutron_port = current_neutron_port_map.get(port_id)
if current_neutron_port:
vif = self._build_vif_model(
context, client, current_neutron_port, networks,
preexisting_port_ids)
for old_vif in old_nw_info:
if old_vif['id'] == port_id:
self._log_error_if_vnic_type_changed(
port_id,
old_vif['vnic_type'],
vif['vnic_type'],
instance,
)
nw_info.append(vif)
elif nw_info_refresh:
LOG.info('Port %s from network info_cache is no '
Expand Down
23 changes: 16 additions & 7 deletions nova/tests/functional/libvirt/test_pci_sriov_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,14 @@ def test_change_bound_port_vnic_type_kills_compute_at_restart(self):
self.host_mappings['compute1'].cell_mapping
) as cctxt:
compute.manager._heal_instance_info_cache(cctxt)
self.assertIn(
'The vnic_type of the bound port %s has been changed in '
'neutron from "direct" to "macvtap". Changing vnic_type of a '
'bound port is not supported by Nova. To avoid breaking the '
'connectivity of the instance please change the port '
'vnic_type back to "direct".' % port['id'],
self.stdlog.logger.output,
)

def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
# we want to fail the netdev lookup only if the pci_address is
Expand Down Expand Up @@ -1013,17 +1021,18 @@ def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
'nova.pci.utils.get_ifname_by_pci_address',
side_effect=fake_get_ifname_by_pci_address,
):
# This is bug 1981813 as the compute service fails to start with an
# exception.
# Nova cannot prevent the vnic_type change on a bound port. Neutron
# should prevent that instead. But the nova-compute should still
# be able to start up and only log an ERROR for this instance in
# inconsistent state.
self.assertRaises(
exception.PciDeviceNotFoundById,
self.restart_compute_service,
'compute1',
)
self.restart_compute_service('compute1')

self.assertIn(
'Virtual interface plugging failed for instance. Probably the '
'vnic_type of the bound port has been changed. Nova does not '
'support such change.',
self.stdlog.logger.output,
)


class SRIOVAttachDetachTest(_PCIServersTestBase):
Expand Down
30 changes: 30 additions & 0 deletions nova/tests/unit/compute/test_compute_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,36 @@ def test_init_instance_with_binding_failed_vif_type(self):
self.compute._init_instance(self.context, instance)
set_error_state.assert_called_once_with(instance)

def test_init_instance_vif_plug_fails_missing_pci(self):
instance = fake_instance.fake_instance_obj(
self.context,
uuid=uuids.instance,
info_cache=None,
power_state=power_state.RUNNING,
vm_state=vm_states.ACTIVE,
task_state=None,
host=self.compute.host,
expected_attrs=['info_cache'])

with test.nested(
mock.patch.object(context, 'get_admin_context',
return_value=self.context),
mock.patch.object(objects.Instance, 'get_network_info',
return_value=network_model.NetworkInfo()),
mock.patch.object(self.compute.driver, 'plug_vifs',
side_effect=exception.PciDeviceNotFoundById("pci-addr")),
mock.patch("nova.compute.manager.LOG.exception"),
) as (get_admin_context, get_nw_info, plug_vifs, log_exception):
# as this does not raise, we are sure that the compute service
# continues initializing the rest of the instances
self.compute._init_instance(self.context, instance)
log_exception.assert_called_once_with(
"Virtual interface plugging failed for instance. Probably the "
"vnic_type of the bound port has been changed. Nova does not "
"support such change.",
instance=instance
)

def _test__validate_pinning_configuration(self, supports_pcpus=True):
instance_1 = fake_instance.fake_instance_obj(
self.context, uuid=uuids.instance_1)
Expand Down
149 changes: 149 additions & 0 deletions nova/tests/unit/network/test_neutron.py
Original file line number Diff line number Diff line change
Expand Up @@ -3383,6 +3383,155 @@ def test_build_network_info_model_empty(
mocked_client.list_ports.assert_called_once_with(
tenant_id=uuids.fake, device_id=uuids.instance)

@mock.patch.object(
neutronapi.API,
'_get_physnet_tunneled_info',
new=mock.Mock(return_value=(None, False)))
@mock.patch.object(
neutronapi.API,
'_get_preexisting_port_ids',
new=mock.Mock(return_value=[]))
@mock.patch.object(
neutronapi.API,
'_get_subnets_from_port',
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
@mock.patch.object(
neutronapi.API,
'_get_floating_ips_by_fixed_and_port',
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
@mock.patch.object(neutronapi, 'get_client')
def test_build_network_info_model_full_vnic_type_change(
self, mock_get_client
):
mocked_client = mock.create_autospec(client.Client)
mock_get_client.return_value = mocked_client
fake_inst = objects.Instance()
fake_inst.project_id = uuids.fake
fake_inst.uuid = uuids.instance
fake_ports = [
{
"id": "port1",
"network_id": "net-id",
"tenant_id": uuids.fake,
"admin_state_up": True,
"status": "ACTIVE",
"fixed_ips": [{"ip_address": "1.1.1.1"}],
"mac_address": "de:ad:be:ef:00:01",
"binding:vif_type": model.VIF_TYPE_BRIDGE,
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
"binding:vif_details": {},
},
]
mocked_client.list_ports.return_value = {'ports': fake_ports}
fake_inst.info_cache = objects.InstanceInfoCache.new(
self.context, uuids.instance)
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])

# build the network info first
nw_infos = self.api._build_network_info_model(
self.context,
fake_inst,
force_refresh=True,
)

self.assertEqual(1, len(nw_infos))
fake_inst.info_cache.network_info = nw_infos

# change the vnic_type of the port and rebuild the network info
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
with mock.patch(
"nova.network.neutron.API._log_error_if_vnic_type_changed"
) as mock_log:
nw_infos = self.api._build_network_info_model(
self.context,
fake_inst,
force_refresh=True,
)

mock_log.assert_called_once_with(
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
self.assertEqual(1, len(nw_infos))

@mock.patch.object(
neutronapi.API,
'_get_physnet_tunneled_info',
new=mock.Mock(return_value=(None, False)))
@mock.patch.object(
neutronapi.API,
'_get_preexisting_port_ids',
new=mock.Mock(return_value=[]))
@mock.patch.object(
neutronapi.API,
'_get_subnets_from_port',
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
@mock.patch.object(
neutronapi.API,
'_get_floating_ips_by_fixed_and_port',
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
@mock.patch.object(neutronapi, 'get_client')
def test_build_network_info_model_single_vnic_type_change(
self, mock_get_client
):
mocked_client = mock.create_autospec(client.Client)
mock_get_client.return_value = mocked_client
fake_inst = objects.Instance()
fake_inst.project_id = uuids.fake
fake_inst.uuid = uuids.instance
fake_ports = [
{
"id": "port1",
"network_id": "net-id",
"tenant_id": uuids.fake,
"admin_state_up": True,
"status": "ACTIVE",
"fixed_ips": [{"ip_address": "1.1.1.1"}],
"mac_address": "de:ad:be:ef:00:01",
"binding:vif_type": model.VIF_TYPE_BRIDGE,
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
"binding:vif_details": {},
},
]
fake_nets = [
{
"id": "net-id",
"name": "foo",
"tenant_id": uuids.fake,
}
]
mocked_client.list_ports.return_value = {'ports': fake_ports}
fake_inst.info_cache = objects.InstanceInfoCache.new(
self.context, uuids.instance)
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])

# build the network info first
nw_infos = self.api._build_network_info_model(
self.context,
fake_inst,
fake_nets,
[fake_ports[0]["id"]],
refresh_vif_id=fake_ports[0]["id"],
)

self.assertEqual(1, len(nw_infos))
fake_inst.info_cache.network_info = nw_infos

# change the vnic_type of the port and rebuild the network info
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
with mock.patch(
"nova.network.neutron.API._log_error_if_vnic_type_changed"
) as mock_log:
nw_infos = self.api._build_network_info_model(
self.context,
fake_inst,
fake_nets,
[fake_ports[0]["id"]],
refresh_vif_id=fake_ports[0]["id"],
)

mock_log.assert_called_once_with(
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
self.assertEqual(1, len(nw_infos))

@mock.patch.object(neutronapi, 'get_client')
def test_get_subnets_from_port(self, mock_get_client):
mocked_client = mock.create_autospec(client.Client)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
fixes:
- |
`Bug #1981813 <https://bugs.launchpad.net/nova/+bug/1981813>`_: Now nova
detects if the ``vnic_type`` of a bound port has been changed in neutron
and leaves an ERROR message in the compute service log as such change on a
bound port is not supported. Also the restart of the nova-compute service
will not crash any more after such port change. Nova will log an ERROR and
skip the initialization of the instance with such port during the startup.

0 comments on commit a28c827

Please sign in to comment.