From 4954f993680c75fd9d3d507f2dcd00300c9b3d44 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 15 Jul 2022 12:43:58 +0200 Subject: [PATCH 1/4] Reproduce bug 1981813 in func env There stable/yoga only change in test_pci_sriov_servers.py due to unittest.mock switch[1] only happened in zed. [1] https://review.opendev.org/q/topic:unittest.mock+status:merged+project:openstack/nova Related-Bug: #1981813 Change-Id: I9367b7ed475917bdb05eb3f209ce1a4e646534e2 (cherry picked from commit f8c91eb75fc5504a37fc3b4be1d65d33dbc9b511) --- nova/tests/fixtures/libvirt.py | 9 ++- .../libvirt/test_pci_sriov_servers.py | 74 +++++++++++++++++++ 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/nova/tests/fixtures/libvirt.py b/nova/tests/fixtures/libvirt.py index 0684bae7ddd..5ccf01e40f9 100644 --- a/nova/tests/fixtures/libvirt.py +++ b/nova/tests/fixtures/libvirt.py @@ -2225,9 +2225,12 @@ def setUp(self): # libvirt driver needs to call out to the filesystem to get the # parent_ifname for the SRIOV VFs. - self.useFixture(fixtures.MockPatch( - 'nova.pci.utils.get_ifname_by_pci_address', - return_value='fake_pf_interface_name')) + self.mock_get_ifname_by_pci_address = self.useFixture( + fixtures.MockPatch( + "nova.pci.utils.get_ifname_by_pci_address", + return_value="fake_pf_interface_name", + ) + ).mock self.useFixture(fixtures.MockPatch( 'nova.pci.utils.get_mac_by_pci_address', diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index c228fb04cf8..c1618751a9e 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -28,6 +28,7 @@ import nova from nova import context +from nova import exception from nova.network import constants from nova import objects from nova.objects import fields @@ -951,6 +952,79 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self): ], ) + def test_change_bound_port_vnic_type_kills_compute_at_restart(self): + """Create a server with a direct port and change the vnic_type of the + bound port to macvtap. Then restart the compute service. + + As the vnic_type is changed on the port but the vif_type is hwveb + instead of macvtap the vif plug logic will try to look up the netdev + of the parent VF. Howvere that VF consumed by the instance so the + netdev does not exists. This causes that the compute service will fail + with an exception during startup + """ + pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=2) + self.start_compute(pci_info=pci_info) + + # create a direct port + port = self.neutron.network_4_port_1 + self.neutron.create_port({'port': port}) + + # create a server using the VF via neutron + server = self._create_server(networks=[{'port': port['id']}]) + + # update the vnic_type of the port in neutron + port = copy.deepcopy(port) + port['binding:vnic_type'] = 'macvtap' + self.neutron.update_port(port['id'], {"port": port}) + + compute = self.computes['compute1'] + + # Force an update on the instance info cache to ensure nova gets the + # information about the updated port + with context.target_cell( + context.get_admin_context(), + self.host_mappings['compute1'].cell_mapping + ) as cctxt: + compute.manager._heal_instance_info_cache(cctxt) + + def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False): + # we want to fail the netdev lookup only if the pci_address is + # already consumed by our instance. So we look into the instance + # definition to see if the device is attached to the instance as VF + conn = compute.manager.driver._host.get_connection() + dom = conn.lookupByUUIDString(server['id']) + dev = dom._def['devices']['nics'][0] + lookup_addr = pci_addr.replace(':', '_').replace('.', '_') + if ( + dev['type'] == 'hostdev' and + dev['source'] == 'pci_' + lookup_addr + ): + # nova tried to look up the netdev of an already consumed VF. + # So we have to fail + raise exception.PciDeviceNotFoundById(id=pci_addr) + + # We need to simulate the actual failure manually as in our functional + # environment all the PCI lookup is mocked. In reality nova tries to + # look up the netdev of the pci device on the host used by the port as + # the parent of the macvtap. However, as the originally direct port is + # bound to the instance, the VF pci device is already consumed by the + # instance and therefore there is no netdev for the VF. + with mock.patch( + 'nova.pci.utils.get_ifname_by_pci_address', + side_effect=fake_get_ifname_by_pci_address, + ): + # This is bug 1981813 as the compute service fails to start with an + # exception. + # Nova cannot prevent the vnic_type change on a bound port. Neutron + # should prevent that instead. But the nova-compute should still + # be able to start up and only log an ERROR for this instance in + # inconsistent state. + self.assertRaises( + exception.PciDeviceNotFoundById, + self.restart_compute_service, + 'compute1', + ) + class SRIOVAttachDetachTest(_PCIServersTestBase): # no need for aliases as these test will request SRIOV via neutron From a28c82719545d5c8ee7f3ff1361b3a796e05095a Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 15 Jul 2022 13:48:46 +0200 Subject: [PATCH 2/4] Gracefully ERROR in _init_instance if vnic_type changed If the vnic_type of a bound port changes from "direct" to "macvtap" and then the compute service is restarted then during _init_instance nova tries to plug the vif of the changed port. However as it now has macvtap vnic_type nova tries to look up the netdev of the parent VF. Still that VF is consumed by the instance so there is no such netdev on the host OS. This error killed the compute service at startup due to unhandled exception. This patch adds the exception handler, logs an ERROR and continue initializing other instances on the host. Also this patch adds a detailed ERROR log when nova detects that the vnic_type changed during _heal_instance_info_cache periodic. Closes-Bug: #1981813 Change-Id: I1719f8eda04e8d15a3b01f0612977164c4e55e85 (cherry picked from commit e43bf900dc8ca66578603bed333c56b215b1876e) --- nova/compute/manager.py | 14 ++ nova/network/neutron.py | 34 ++++ .../libvirt/test_pci_sriov_servers.py | 23 ++- nova/tests/unit/compute/test_compute_mgr.py | 30 ++++ nova/tests/unit/network/test_neutron.py | 149 ++++++++++++++++++ ...813-vnic-type-change-9f3e16fae885b57f.yaml | 9 ++ 6 files changed, 252 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 0762098328a..435578d40c3 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1242,6 +1242,20 @@ def _init_instance(self, context, instance): 'updated.', instance=instance) self._set_instance_obj_error_state(instance) return + except exception.PciDeviceNotFoundById: + # This is bug 1981813 where the bound port vnic_type has changed + # from direct to macvtap. Nova does not support that and it + # already printed an ERROR when the change is detected during + # _heal_instance_info_cache. Now we print an ERROR again and skip + # plugging the vifs but let the service startup continue to init + # the other instances + LOG.exception( + 'Virtual interface plugging failed for instance. Probably the ' + 'vnic_type of the bound port has been changed. Nova does not ' + 'support such change.', + instance=instance + ) + return if instance.task_state == task_states.RESIZE_MIGRATING: # We crashed during resize/migration, so roll back for safety diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 3c9da4e9370..1e703658f87 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -3383,6 +3383,25 @@ def _build_vif_model(self, context, client, current_neutron_port, delegate_create=True, ) + def _log_error_if_vnic_type_changed( + self, port_id, old_vnic_type, new_vnic_type, instance + ): + if old_vnic_type and old_vnic_type != new_vnic_type: + LOG.error( + 'The vnic_type of the bound port %s has ' + 'been changed in neutron from "%s" to ' + '"%s". Changing vnic_type of a bound port ' + 'is not supported by Nova. To avoid ' + 'breaking the connectivity of the instance ' + 'please change the port vnic_type back to ' + '"%s".', + port_id, + old_vnic_type, + new_vnic_type, + old_vnic_type, + instance=instance + ) + def _build_network_info_model(self, context, instance, networks=None, port_ids=None, admin_client=None, preexisting_port_ids=None, @@ -3456,6 +3475,12 @@ def _build_network_info_model(self, context, instance, networks=None, preexisting_port_ids) for index, vif in enumerate(nw_info): if vif['id'] == refresh_vif_id: + self._log_error_if_vnic_type_changed( + vif['id'], + vif['vnic_type'], + refreshed_vif['vnic_type'], + instance, + ) # Update the existing entry. nw_info[index] = refreshed_vif LOG.debug('Updated VIF entry in instance network ' @@ -3505,6 +3530,7 @@ def _build_network_info_model(self, context, instance, networks=None, networks, port_ids = self._gather_port_ids_and_networks( context, instance, networks, port_ids, client) + old_nw_info = instance.get_network_info() nw_info = network_model.NetworkInfo() for port_id in port_ids: current_neutron_port = current_neutron_port_map.get(port_id) @@ -3512,6 +3538,14 @@ def _build_network_info_model(self, context, instance, networks=None, vif = self._build_vif_model( context, client, current_neutron_port, networks, preexisting_port_ids) + for old_vif in old_nw_info: + if old_vif['id'] == port_id: + self._log_error_if_vnic_type_changed( + port_id, + old_vif['vnic_type'], + vif['vnic_type'], + instance, + ) nw_info.append(vif) elif nw_info_refresh: LOG.info('Port %s from network info_cache is no ' diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index c1618751a9e..c8add15480f 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -986,6 +986,14 @@ def test_change_bound_port_vnic_type_kills_compute_at_restart(self): self.host_mappings['compute1'].cell_mapping ) as cctxt: compute.manager._heal_instance_info_cache(cctxt) + self.assertIn( + 'The vnic_type of the bound port %s has been changed in ' + 'neutron from "direct" to "macvtap". Changing vnic_type of a ' + 'bound port is not supported by Nova. To avoid breaking the ' + 'connectivity of the instance please change the port ' + 'vnic_type back to "direct".' % port['id'], + self.stdlog.logger.output, + ) def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False): # we want to fail the netdev lookup only if the pci_address is @@ -1013,17 +1021,18 @@ def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False): 'nova.pci.utils.get_ifname_by_pci_address', side_effect=fake_get_ifname_by_pci_address, ): - # This is bug 1981813 as the compute service fails to start with an - # exception. # Nova cannot prevent the vnic_type change on a bound port. Neutron # should prevent that instead. But the nova-compute should still # be able to start up and only log an ERROR for this instance in # inconsistent state. - self.assertRaises( - exception.PciDeviceNotFoundById, - self.restart_compute_service, - 'compute1', - ) + self.restart_compute_service('compute1') + + self.assertIn( + 'Virtual interface plugging failed for instance. Probably the ' + 'vnic_type of the bound port has been changed. Nova does not ' + 'support such change.', + self.stdlog.logger.output, + ) class SRIOVAttachDetachTest(_PCIServersTestBase): diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 147e4480199..1b0469b8e65 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -1306,6 +1306,36 @@ def test_init_instance_with_binding_failed_vif_type(self): self.compute._init_instance(self.context, instance) set_error_state.assert_called_once_with(instance) + def test_init_instance_vif_plug_fails_missing_pci(self): + instance = fake_instance.fake_instance_obj( + self.context, + uuid=uuids.instance, + info_cache=None, + power_state=power_state.RUNNING, + vm_state=vm_states.ACTIVE, + task_state=None, + host=self.compute.host, + expected_attrs=['info_cache']) + + with test.nested( + mock.patch.object(context, 'get_admin_context', + return_value=self.context), + mock.patch.object(objects.Instance, 'get_network_info', + return_value=network_model.NetworkInfo()), + mock.patch.object(self.compute.driver, 'plug_vifs', + side_effect=exception.PciDeviceNotFoundById("pci-addr")), + mock.patch("nova.compute.manager.LOG.exception"), + ) as (get_admin_context, get_nw_info, plug_vifs, log_exception): + # as this does not raise, we are sure that the compute service + # continues initializing the rest of the instances + self.compute._init_instance(self.context, instance) + log_exception.assert_called_once_with( + "Virtual interface plugging failed for instance. Probably the " + "vnic_type of the bound port has been changed. Nova does not " + "support such change.", + instance=instance + ) + def _test__validate_pinning_configuration(self, supports_pcpus=True): instance_1 = fake_instance.fake_instance_obj( self.context, uuid=uuids.instance_1) diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index 03e65bb6081..5cde8d482d3 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -3383,6 +3383,155 @@ def test_build_network_info_model_empty( mocked_client.list_ports.assert_called_once_with( tenant_id=uuids.fake, device_id=uuids.instance) + @mock.patch.object( + neutronapi.API, + '_get_physnet_tunneled_info', + new=mock.Mock(return_value=(None, False))) + @mock.patch.object( + neutronapi.API, + '_get_preexisting_port_ids', + new=mock.Mock(return_value=[])) + @mock.patch.object( + neutronapi.API, + '_get_subnets_from_port', + new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')])) + @mock.patch.object( + neutronapi.API, + '_get_floating_ips_by_fixed_and_port', + new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}])) + @mock.patch.object(neutronapi, 'get_client') + def test_build_network_info_model_full_vnic_type_change( + self, mock_get_client + ): + mocked_client = mock.create_autospec(client.Client) + mock_get_client.return_value = mocked_client + fake_inst = objects.Instance() + fake_inst.project_id = uuids.fake + fake_inst.uuid = uuids.instance + fake_ports = [ + { + "id": "port1", + "network_id": "net-id", + "tenant_id": uuids.fake, + "admin_state_up": True, + "status": "ACTIVE", + "fixed_ips": [{"ip_address": "1.1.1.1"}], + "mac_address": "de:ad:be:ef:00:01", + "binding:vif_type": model.VIF_TYPE_BRIDGE, + "binding:vnic_type": model.VNIC_TYPE_DIRECT, + "binding:vif_details": {}, + }, + ] + mocked_client.list_ports.return_value = {'ports': fake_ports} + fake_inst.info_cache = objects.InstanceInfoCache.new( + self.context, uuids.instance) + fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([]) + + # build the network info first + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + force_refresh=True, + ) + + self.assertEqual(1, len(nw_infos)) + fake_inst.info_cache.network_info = nw_infos + + # change the vnic_type of the port and rebuild the network info + fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP + with mock.patch( + "nova.network.neutron.API._log_error_if_vnic_type_changed" + ) as mock_log: + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + force_refresh=True, + ) + + mock_log.assert_called_once_with( + fake_ports[0]["id"], "direct", "macvtap", fake_inst) + self.assertEqual(1, len(nw_infos)) + + @mock.patch.object( + neutronapi.API, + '_get_physnet_tunneled_info', + new=mock.Mock(return_value=(None, False))) + @mock.patch.object( + neutronapi.API, + '_get_preexisting_port_ids', + new=mock.Mock(return_value=[])) + @mock.patch.object( + neutronapi.API, + '_get_subnets_from_port', + new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')])) + @mock.patch.object( + neutronapi.API, + '_get_floating_ips_by_fixed_and_port', + new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}])) + @mock.patch.object(neutronapi, 'get_client') + def test_build_network_info_model_single_vnic_type_change( + self, mock_get_client + ): + mocked_client = mock.create_autospec(client.Client) + mock_get_client.return_value = mocked_client + fake_inst = objects.Instance() + fake_inst.project_id = uuids.fake + fake_inst.uuid = uuids.instance + fake_ports = [ + { + "id": "port1", + "network_id": "net-id", + "tenant_id": uuids.fake, + "admin_state_up": True, + "status": "ACTIVE", + "fixed_ips": [{"ip_address": "1.1.1.1"}], + "mac_address": "de:ad:be:ef:00:01", + "binding:vif_type": model.VIF_TYPE_BRIDGE, + "binding:vnic_type": model.VNIC_TYPE_DIRECT, + "binding:vif_details": {}, + }, + ] + fake_nets = [ + { + "id": "net-id", + "name": "foo", + "tenant_id": uuids.fake, + } + ] + mocked_client.list_ports.return_value = {'ports': fake_ports} + fake_inst.info_cache = objects.InstanceInfoCache.new( + self.context, uuids.instance) + fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([]) + + # build the network info first + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + fake_nets, + [fake_ports[0]["id"]], + refresh_vif_id=fake_ports[0]["id"], + ) + + self.assertEqual(1, len(nw_infos)) + fake_inst.info_cache.network_info = nw_infos + + # change the vnic_type of the port and rebuild the network info + fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP + with mock.patch( + "nova.network.neutron.API._log_error_if_vnic_type_changed" + ) as mock_log: + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + fake_nets, + [fake_ports[0]["id"]], + refresh_vif_id=fake_ports[0]["id"], + ) + + mock_log.assert_called_once_with( + fake_ports[0]["id"], "direct", "macvtap", fake_inst) + self.assertEqual(1, len(nw_infos)) + @mock.patch.object(neutronapi, 'get_client') def test_get_subnets_from_port(self, mock_get_client): mocked_client = mock.create_autospec(client.Client) diff --git a/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml b/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml new file mode 100644 index 00000000000..a5a3b7c8c2c --- /dev/null +++ b/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + `Bug #1981813 `_: Now nova + detects if the ``vnic_type`` of a bound port has been changed in neutron + and leaves an ERROR message in the compute service log as such change on a + bound port is not supported. Also the restart of the nova-compute service + will not crash any more after such port change. Nova will log an ERROR and + skip the initialization of the instance with such port during the startup. From 516f0de1f6a54cd24d8ebc906c1e3fd3bab0d32e Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 10 Nov 2022 09:55:48 -0800 Subject: [PATCH 3/4] [stable-only][cve] Check VMDK create-type against an allowed list NOTE(sbauza): Stable policy allows us to proactively merge a backport without waiting for the parent patch to be merged (exception to rule #4 in [1]. Marking [stable-only] in order to silence nova-tox-validate-backport [1] https://docs.openstack.org/project-team-guide/stable-branches.html#appropriate-fixes Related-Bug: #1996188 Change-Id: I5a399f1d3d702bfb76c067893e9c924904c8c360 --- nova/conf/compute.py | 9 ++++++ nova/tests/unit/virt/test_images.py | 46 +++++++++++++++++++++++++++++ nova/virt/images.py | 31 +++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 5abe7694f80..352080011ad 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -1007,6 +1007,15 @@ * ``[scheduler]query_placement_for_image_type_support`` - enables filtering computes based on supported image types, which is required to be enabled for this to take effect. +"""), + cfg.ListOpt('vmdk_allowed_types', + default=['streamOptimized', 'monolithicSparse'], + help=""" +A list of strings describing allowed VMDK "create-type" subformats +that will be allowed. This is recommended to only include +single-file-with-sparse-header variants to avoid potential host file +exposure due to processing named extents. If this list is empty, then no +form of VMDK image will be allowed. """), cfg.BoolOpt('packing_host_numa_cells_allocation_strategy', default=True, diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py index 085b169db3c..563330b5414 100644 --- a/nova/tests/unit/virt/test_images.py +++ b/nova/tests/unit/virt/test_images.py @@ -16,6 +16,8 @@ import mock from oslo_concurrency import processutils +from oslo_serialization import jsonutils +from oslo_utils import imageutils from nova.compute import utils as compute_utils from nova import exception @@ -135,3 +137,47 @@ def test_convert_image_without_direct_io_support(self, mock_execute, '-O', 'out_format', '-f', 'in_format', 'source', 'dest') mock_disk_op_sema.__enter__.assert_called_once() self.assertTupleEqual(expected, mock_execute.call_args[0]) + + def test_convert_image_vmdk_allowed_list_checking(self): + info = {'format': 'vmdk', + 'format-specific': { + 'type': 'vmdk', + 'data': { + 'create-type': 'monolithicFlat', + }}} + + # If the format is not in the allowed list, we should get an error + self.assertRaises(exception.ImageUnacceptable, + images.check_vmdk_image, 'foo', + imageutils.QemuImgInfo(jsonutils.dumps(info), + format='json')) + + # With the format in the allowed list, no error + self.flags(vmdk_allowed_types=['streamOptimized', 'monolithicFlat', + 'monolithicSparse'], + group='compute') + images.check_vmdk_image('foo', + imageutils.QemuImgInfo(jsonutils.dumps(info), + format='json')) + + # With an empty list, allow nothing + self.flags(vmdk_allowed_types=[], group='compute') + self.assertRaises(exception.ImageUnacceptable, + images.check_vmdk_image, 'foo', + imageutils.QemuImgInfo(jsonutils.dumps(info), + format='json')) + + @mock.patch.object(images, 'fetch') + @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info') + def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch): + info = {'format': 'vmdk', + 'format-specific': { + 'type': 'vmdk', + 'data': { + 'create-type': 'monolithicFlat', + }}} + mock_info.return_value = jsonutils.dumps(info) + with mock.patch('os.path.exists', return_value=True): + e = self.assertRaises(exception.ImageUnacceptable, + images.fetch_to_raw, None, 'foo', 'anypath') + self.assertIn('Invalid VMDK create-type specified', str(e)) diff --git a/nova/virt/images.py b/nova/virt/images.py index 5358f3766ac..f13c8722909 100644 --- a/nova/virt/images.py +++ b/nova/virt/images.py @@ -110,6 +110,34 @@ def get_info(context, image_href): return IMAGE_API.get(context, image_href) +def check_vmdk_image(image_id, data): + # Check some rules about VMDK files. Specifically we want to make + # sure that the "create-type" of the image is one that we allow. + # Some types of VMDK files can reference files outside the disk + # image and we do not want to allow those for obvious reasons. + + types = CONF.compute.vmdk_allowed_types + + if not len(types): + LOG.warning('Refusing to allow VMDK image as vmdk_allowed_' + 'types is empty') + msg = _('Invalid VMDK create-type specified') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + + try: + create_type = data.format_specific['data']['create-type'] + except KeyError: + msg = _('Unable to determine VMDK create-type') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + + if create_type not in CONF.compute.vmdk_allowed_types: + LOG.warning('Refusing to process VMDK file with create-type of %r ' + 'which is not in allowed set of: %s', create_type, + ','.join(CONF.compute.vmdk_allowed_types)) + msg = _('Invalid VMDK create-type specified') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + + def fetch_to_raw(context, image_href, path, trusted_certs=None): path_tmp = "%s.part" % path fetch(context, image_href, path_tmp, trusted_certs) @@ -129,6 +157,9 @@ def fetch_to_raw(context, image_href, path, trusted_certs=None): reason=(_("fmt=%(fmt)s backed by: %(backing_file)s") % {'fmt': fmt, 'backing_file': backing_file})) + if fmt == 'vmdk': + check_vmdk_image(image_href, data) + if fmt != "raw" and CONF.force_raw_images: staged = "%s.converted" % path LOG.debug("%s was %s, converting to raw", image_href, fmt) From c07495d9d64dd0635d72fc7ff67d73a656a40d13 Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Tue, 26 Jul 2022 16:02:17 +0200 Subject: [PATCH 4/4] Add a workaround to skip hypervisor version check on LM When turned on, this will disable the version-checking of hypervisors during live-migration. This can be useful for operators in certain scenarios when upgrading. E.g. if you want to relocate all instances off a compute node due to an emergency hardware issue, and you only have another old compute node ready at the time. Note, though: libvirt will do its own internal compatibility checks, and might still reject live migration if the destination is incompatible. Closes-Bug: #1982853 Change-Id: Iec387dcbc49ddb91ebf5cfd188224eaf6021c0e1 Signed-off-by: Kashyap Chamarthy (cherry picked from commit 00ed8a232bc22f48011e95a0b47750520a5b4d47) --- nova/conductor/tasks/live_migrate.py | 5 ++-- nova/conf/workarounds.py | 7 +++++ .../unit/conductor/tasks/test_live_migrate.py | 30 +++++++++++++++++++ ...-version-check-on-lm-a87f2dcb4f8bf0f2.yaml | 13 ++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/skip-hypervisor-version-check-on-lm-a87f2dcb4f8bf0f2.yaml diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 1acae88b264..f8819b0dc85 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -347,8 +347,9 @@ def _check_compatible_with_source_hypervisor(self, destination): source_version = source_info.hypervisor_version destination_version = destination_info.hypervisor_version - if source_version > destination_version: - raise exception.DestinationHypervisorTooOld() + if not CONF.workarounds.skip_hypervisor_version_check_on_lm: + if source_version > destination_version: + raise exception.DestinationHypervisorTooOld() return source_info, destination_info def _call_livem_checks_on_host(self, destination, provider_mapping): diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 6c52eae8e5d..2ec53282cdb 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -409,6 +409,13 @@ with the destination host. When using QEMU >= 2.9 and libvirt >= 4.4.0, libvirt will do the correct thing with respect to checking CPU compatibility on the destination host during live migration. +"""), + cfg.BoolOpt( + 'skip_hypervisor_version_check_on_lm', + default=False, + help=""" +When this is enabled, it will skip version-checking of hypervisors +during live migration. """), ] diff --git a/nova/tests/unit/conductor/tasks/test_live_migrate.py b/nova/tests/unit/conductor/tasks/test_live_migrate.py index cb40c076c82..dd4ee7c3fec 100644 --- a/nova/tests/unit/conductor/tasks/test_live_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py @@ -345,6 +345,36 @@ def test_check_compatible_fails_with_hypervisor_too_old( mock.call(self.destination)], mock_get_info.call_args_list) + @mock.patch.object(live_migrate.LiveMigrationTask, '_get_compute_info') + def test_skip_hypervisor_version_check_on_lm_raise_ex(self, mock_get_info): + host1 = {'hypervisor_type': 'a', 'hypervisor_version': 7} + host2 = {'hypervisor_type': 'a', 'hypervisor_version': 6} + self.flags(group='workarounds', + skip_hypervisor_version_check_on_lm=False) + mock_get_info.side_effect = [objects.ComputeNode(**host1), + objects.ComputeNode(**host2)] + self.assertRaises(exception.DestinationHypervisorTooOld, + self.task._check_compatible_with_source_hypervisor, + self.destination) + self.assertEqual([mock.call(self.instance_host), + mock.call(self.destination)], + mock_get_info.call_args_list) + + @mock.patch.object(live_migrate.LiveMigrationTask, '_get_compute_info') + def test_skip_hypervisor_version_check_on_lm_do_not_raise_ex( + self, mock_get_info + ): + host1 = {'hypervisor_type': 'a', 'hypervisor_version': 7} + host2 = {'hypervisor_type': 'a', 'hypervisor_version': 6} + self.flags(group='workarounds', + skip_hypervisor_version_check_on_lm=True) + mock_get_info.side_effect = [objects.ComputeNode(**host1), + objects.ComputeNode(**host2)] + self.task._check_compatible_with_source_hypervisor(self.destination) + self.assertEqual([mock.call(self.instance_host), + mock.call(self.destination)], + mock_get_info.call_args_list) + @mock.patch.object(compute_rpcapi.ComputeAPI, 'check_can_live_migrate_destination') def test_check_requested_destination(self, mock_check): diff --git a/releasenotes/notes/skip-hypervisor-version-check-on-lm-a87f2dcb4f8bf0f2.yaml b/releasenotes/notes/skip-hypervisor-version-check-on-lm-a87f2dcb4f8bf0f2.yaml new file mode 100644 index 00000000000..00fe6a24c70 --- /dev/null +++ b/releasenotes/notes/skip-hypervisor-version-check-on-lm-a87f2dcb4f8bf0f2.yaml @@ -0,0 +1,13 @@ +--- +feature: + - | + Adds a workaround that allows one to disable hypervisor + version-check on live migration. This workaround option can be + useful in certain scenarios when upgrading. E.g. if you want to + relocate all instances off a compute node due to an emergency + hardware issue, and you only have another old compute node ready at + the time. + + To enable this, use the config attribute + ``[workarounds]skip_hypervisor_version_check_on_lm=True`` in + ``nova.conf``. The option defaults to ``False``.