Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions nova/compute/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,6 +1242,20 @@ def _init_instance(self, context, instance):
'updated.', instance=instance)
self._set_instance_obj_error_state(instance)
return
except exception.PciDeviceNotFoundById:
# This is bug 1981813 where the bound port vnic_type has changed
# from direct to macvtap. Nova does not support that and it
# already printed an ERROR when the change is detected during
# _heal_instance_info_cache. Now we print an ERROR again and skip
# plugging the vifs but let the service startup continue to init
# the other instances
LOG.exception(
'Virtual interface plugging failed for instance. Probably the '
'vnic_type of the bound port has been changed. Nova does not '
'support such change.',
instance=instance
)
return

if instance.task_state == task_states.RESIZE_MIGRATING:
# We crashed during resize/migration, so roll back for safety
Expand Down
5 changes: 3 additions & 2 deletions nova/conductor/tasks/live_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,9 @@ def _check_compatible_with_source_hypervisor(self, destination):

source_version = source_info.hypervisor_version
destination_version = destination_info.hypervisor_version
if source_version > destination_version:
raise exception.DestinationHypervisorTooOld()
if not CONF.workarounds.skip_hypervisor_version_check_on_lm:
if source_version > destination_version:
raise exception.DestinationHypervisorTooOld()
return source_info, destination_info

def _call_livem_checks_on_host(self, destination, provider_mapping):
Expand Down
9 changes: 9 additions & 0 deletions nova/conf/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,15 @@
* ``[scheduler]query_placement_for_image_type_support`` - enables
filtering computes based on supported image types, which is required
to be enabled for this to take effect.
"""),
cfg.ListOpt('vmdk_allowed_types',
default=['streamOptimized', 'monolithicSparse'],
help="""
A list of strings describing allowed VMDK "create-type" subformats
that will be allowed. This is recommended to only include
single-file-with-sparse-header variants to avoid potential host file
exposure due to processing named extents. If this list is empty, then no
form of VMDK image will be allowed.
"""),
cfg.BoolOpt('packing_host_numa_cells_allocation_strategy',
default=True,
Expand Down
7 changes: 7 additions & 0 deletions nova/conf/workarounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,13 @@
with the destination host. When using QEMU >= 2.9 and libvirt >=
4.4.0, libvirt will do the correct thing with respect to checking CPU
compatibility on the destination host during live migration.
"""),
cfg.BoolOpt(
'skip_hypervisor_version_check_on_lm',
default=False,
help="""
When this is enabled, it will skip version-checking of hypervisors
during live migration.
"""),
]

Expand Down
34 changes: 34 additions & 0 deletions nova/network/neutron.py
Original file line number Diff line number Diff line change
Expand Up @@ -3383,6 +3383,25 @@ def _build_vif_model(self, context, client, current_neutron_port,
delegate_create=True,
)

def _log_error_if_vnic_type_changed(
self, port_id, old_vnic_type, new_vnic_type, instance
):
if old_vnic_type and old_vnic_type != new_vnic_type:
LOG.error(
'The vnic_type of the bound port %s has '
'been changed in neutron from "%s" to '
'"%s". Changing vnic_type of a bound port '
'is not supported by Nova. To avoid '
'breaking the connectivity of the instance '
'please change the port vnic_type back to '
'"%s".',
port_id,
old_vnic_type,
new_vnic_type,
old_vnic_type,
instance=instance
)

def _build_network_info_model(self, context, instance, networks=None,
port_ids=None, admin_client=None,
preexisting_port_ids=None,
Expand Down Expand Up @@ -3456,6 +3475,12 @@ def _build_network_info_model(self, context, instance, networks=None,
preexisting_port_ids)
for index, vif in enumerate(nw_info):
if vif['id'] == refresh_vif_id:
self._log_error_if_vnic_type_changed(
vif['id'],
vif['vnic_type'],
refreshed_vif['vnic_type'],
instance,
)
# Update the existing entry.
nw_info[index] = refreshed_vif
LOG.debug('Updated VIF entry in instance network '
Expand Down Expand Up @@ -3505,13 +3530,22 @@ def _build_network_info_model(self, context, instance, networks=None,
networks, port_ids = self._gather_port_ids_and_networks(
context, instance, networks, port_ids, client)

old_nw_info = instance.get_network_info()
nw_info = network_model.NetworkInfo()
for port_id in port_ids:
current_neutron_port = current_neutron_port_map.get(port_id)
if current_neutron_port:
vif = self._build_vif_model(
context, client, current_neutron_port, networks,
preexisting_port_ids)
for old_vif in old_nw_info:
if old_vif['id'] == port_id:
self._log_error_if_vnic_type_changed(
port_id,
old_vif['vnic_type'],
vif['vnic_type'],
instance,
)
nw_info.append(vif)
elif nw_info_refresh:
LOG.info('Port %s from network info_cache is no '
Expand Down
9 changes: 6 additions & 3 deletions nova/tests/fixtures/libvirt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2225,9 +2225,12 @@ def setUp(self):

# libvirt driver needs to call out to the filesystem to get the
# parent_ifname for the SRIOV VFs.
self.useFixture(fixtures.MockPatch(
'nova.pci.utils.get_ifname_by_pci_address',
return_value='fake_pf_interface_name'))
self.mock_get_ifname_by_pci_address = self.useFixture(
fixtures.MockPatch(
"nova.pci.utils.get_ifname_by_pci_address",
return_value="fake_pf_interface_name",
)
).mock

self.useFixture(fixtures.MockPatch(
'nova.pci.utils.get_mac_by_pci_address',
Expand Down
83 changes: 83 additions & 0 deletions nova/tests/functional/libvirt/test_pci_sriov_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import nova
from nova import context
from nova import exception
from nova.network import constants
from nova import objects
from nova.objects import fields
Expand Down Expand Up @@ -951,6 +952,88 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self):
],
)

def test_change_bound_port_vnic_type_kills_compute_at_restart(self):
"""Create a server with a direct port and change the vnic_type of the
bound port to macvtap. Then restart the compute service.

As the vnic_type is changed on the port but the vif_type is hwveb
instead of macvtap the vif plug logic will try to look up the netdev
of the parent VF. Howvere that VF consumed by the instance so the
netdev does not exists. This causes that the compute service will fail
with an exception during startup
"""
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=2)
self.start_compute(pci_info=pci_info)

# create a direct port
port = self.neutron.network_4_port_1
self.neutron.create_port({'port': port})

# create a server using the VF via neutron
server = self._create_server(networks=[{'port': port['id']}])

# update the vnic_type of the port in neutron
port = copy.deepcopy(port)
port['binding:vnic_type'] = 'macvtap'
self.neutron.update_port(port['id'], {"port": port})

compute = self.computes['compute1']

# Force an update on the instance info cache to ensure nova gets the
# information about the updated port
with context.target_cell(
context.get_admin_context(),
self.host_mappings['compute1'].cell_mapping
) as cctxt:
compute.manager._heal_instance_info_cache(cctxt)
self.assertIn(
'The vnic_type of the bound port %s has been changed in '
'neutron from "direct" to "macvtap". Changing vnic_type of a '
'bound port is not supported by Nova. To avoid breaking the '
'connectivity of the instance please change the port '
'vnic_type back to "direct".' % port['id'],
self.stdlog.logger.output,
)

def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
# we want to fail the netdev lookup only if the pci_address is
# already consumed by our instance. So we look into the instance
# definition to see if the device is attached to the instance as VF
conn = compute.manager.driver._host.get_connection()
dom = conn.lookupByUUIDString(server['id'])
dev = dom._def['devices']['nics'][0]
lookup_addr = pci_addr.replace(':', '_').replace('.', '_')
if (
dev['type'] == 'hostdev' and
dev['source'] == 'pci_' + lookup_addr
):
# nova tried to look up the netdev of an already consumed VF.
# So we have to fail
raise exception.PciDeviceNotFoundById(id=pci_addr)

# We need to simulate the actual failure manually as in our functional
# environment all the PCI lookup is mocked. In reality nova tries to
# look up the netdev of the pci device on the host used by the port as
# the parent of the macvtap. However, as the originally direct port is
# bound to the instance, the VF pci device is already consumed by the
# instance and therefore there is no netdev for the VF.
with mock.patch(
'nova.pci.utils.get_ifname_by_pci_address',
side_effect=fake_get_ifname_by_pci_address,
):
# Nova cannot prevent the vnic_type change on a bound port. Neutron
# should prevent that instead. But the nova-compute should still
# be able to start up and only log an ERROR for this instance in
# inconsistent state.
self.restart_compute_service('compute1')

self.assertIn(
'Virtual interface plugging failed for instance. Probably the '
'vnic_type of the bound port has been changed. Nova does not '
'support such change.',
self.stdlog.logger.output,
)


class SRIOVAttachDetachTest(_PCIServersTestBase):
# no need for aliases as these test will request SRIOV via neutron
Expand Down
30 changes: 30 additions & 0 deletions nova/tests/unit/compute/test_compute_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,36 @@ def test_init_instance_with_binding_failed_vif_type(self):
self.compute._init_instance(self.context, instance)
set_error_state.assert_called_once_with(instance)

def test_init_instance_vif_plug_fails_missing_pci(self):
instance = fake_instance.fake_instance_obj(
self.context,
uuid=uuids.instance,
info_cache=None,
power_state=power_state.RUNNING,
vm_state=vm_states.ACTIVE,
task_state=None,
host=self.compute.host,
expected_attrs=['info_cache'])

with test.nested(
mock.patch.object(context, 'get_admin_context',
return_value=self.context),
mock.patch.object(objects.Instance, 'get_network_info',
return_value=network_model.NetworkInfo()),
mock.patch.object(self.compute.driver, 'plug_vifs',
side_effect=exception.PciDeviceNotFoundById("pci-addr")),
mock.patch("nova.compute.manager.LOG.exception"),
) as (get_admin_context, get_nw_info, plug_vifs, log_exception):
# as this does not raise, we are sure that the compute service
# continues initializing the rest of the instances
self.compute._init_instance(self.context, instance)
log_exception.assert_called_once_with(
"Virtual interface plugging failed for instance. Probably the "
"vnic_type of the bound port has been changed. Nova does not "
"support such change.",
instance=instance
)

def _test__validate_pinning_configuration(self, supports_pcpus=True):
instance_1 = fake_instance.fake_instance_obj(
self.context, uuid=uuids.instance_1)
Expand Down
30 changes: 30 additions & 0 deletions nova/tests/unit/conductor/tasks/test_live_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,36 @@ def test_check_compatible_fails_with_hypervisor_too_old(
mock.call(self.destination)],
mock_get_info.call_args_list)

@mock.patch.object(live_migrate.LiveMigrationTask, '_get_compute_info')
def test_skip_hypervisor_version_check_on_lm_raise_ex(self, mock_get_info):
host1 = {'hypervisor_type': 'a', 'hypervisor_version': 7}
host2 = {'hypervisor_type': 'a', 'hypervisor_version': 6}
self.flags(group='workarounds',
skip_hypervisor_version_check_on_lm=False)
mock_get_info.side_effect = [objects.ComputeNode(**host1),
objects.ComputeNode(**host2)]
self.assertRaises(exception.DestinationHypervisorTooOld,
self.task._check_compatible_with_source_hypervisor,
self.destination)
self.assertEqual([mock.call(self.instance_host),
mock.call(self.destination)],
mock_get_info.call_args_list)

@mock.patch.object(live_migrate.LiveMigrationTask, '_get_compute_info')
def test_skip_hypervisor_version_check_on_lm_do_not_raise_ex(
self, mock_get_info
):
host1 = {'hypervisor_type': 'a', 'hypervisor_version': 7}
host2 = {'hypervisor_type': 'a', 'hypervisor_version': 6}
self.flags(group='workarounds',
skip_hypervisor_version_check_on_lm=True)
mock_get_info.side_effect = [objects.ComputeNode(**host1),
objects.ComputeNode(**host2)]
self.task._check_compatible_with_source_hypervisor(self.destination)
self.assertEqual([mock.call(self.instance_host),
mock.call(self.destination)],
mock_get_info.call_args_list)

@mock.patch.object(compute_rpcapi.ComputeAPI,
'check_can_live_migrate_destination')
def test_check_requested_destination(self, mock_check):
Expand Down
Loading