From c02e213d507c830427a86d6a4bb4f7a2f5158590 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 14 Dec 2018 17:23:00 +0100 Subject: [PATCH] Ensure that bandwidth and VF are from the same PF A neutron port can be created with direct vnic type and also it can have bandwidth resource request at the same time. In this case placement will offer allocation candidates that could fulfill the bandwidth request, then the nova scheduler's PciPassthroughFilter checks if a PCI device, a VF, is available for such request. This check is based on the physnet of the neutron port and the physical_network tag in the pci/passthrough_whitelist config. It does not consider the actual PF providing the bandwidth. The currently unsupported case is when a single compute node has whitelisted VFs from more than one PF which are connected to the same physnet. These PFs can have totally different bandwidth inventories in placement. For example PF2 can have plenty of bandwidth available and PF3 has no bandwidth configured at all. In this case the PciPassthroughFilter might accept the host simply because PF3 still has available VFs even if the bandwidth from the port is fulfilled from PF2 which in return might not have available VFs any more. Moreover the PCI claim has the same logic as the filter so it will claim the VF from PF3 while the bandwidth was allocated from PF2 in placement. This patch does not try to solve the issue in the PciPassthroughFilter but it does solves the issue in the pci claim. This means that after successful scheduling the pci claim can still fail if bandwidth is allocated from one PF but a VF is not available from that specific PF any more. This will lead to re-schedule. Making the PciPassthroughFilter smart enough is complicated because: * The filters are not knowing about placement allocation candidates at all * The filters are working per compute host not per allocation candidates. If there are two allocation candidates for the same host then nova will only try to filter for the first one. [1][2] This patch applies the following logic: The compute manager checks the InstancePCIRequest ovos in a given boot request and maps each of them to the neutron port that requested the PCI device. Then it maps the neutron port to the physical device RP in the placement allocation made for this server. Then the spec in the InstancePCIRequest is extended with the interface name of the PF from where the bandwidth was allocated from based on the name of the device RP. Then the PCI claim will enforce that the PF interface name in the request matches the interface name of the PF from where the VF is selected from. The PCI claim code knows about the PF interface name of each available VF from the virt driver reporting the 'parent_ifname' key as part of the return value of the get_available_resource() driver call. The PCI claim process is not changed as it already enforces that every fields from the request matches with the fields of the selected device pool. The current patch extends the libvirt driver to provider PF interface name information. Besides the libvirt driver the xenapi driver also support SRIOV VF handling but this patch does not extend the xenapi driver. So for the xenapi the above described configuration currently kept unsupported. I know that this feels complicated but it is necessary becase VFs has not been counted as resources in placement yet. [1] https://github.com/openstack/nova/blob/f6996903d2ef0fdb40135b506c83ed6517b28e19/nova/scheduler/filter_scheduler.py#L239 [2] https://github.com/openstack/nova/blob/f6996903d2ef0fdb40135b506c83ed6517b28e19/nova/scheduler/filter_scheduler.py#L426 blueprint: bandwidth-resource-provider Change-Id: I038867c4094d79ae4a20615ab9c9f9e38fcc2e0a --- nova/compute/manager.py | 62 +++++++++ nova/pci/stats.py | 8 ++ nova/scheduler/client/report.py | 21 +++ nova/tests/fixtures.py | 21 ++- nova/tests/functional/test_servers.py | 116 +++++++++++++++- nova/tests/unit/compute/test_compute_mgr.py | 124 ++++++++++++++++++ .../unit/scheduler/client/test_report.py | 54 ++++++++ nova/tests/unit/virt/libvirt/fakelibvirt.py | 6 + nova/tests/unit/virt/libvirt/test_driver.py | 26 +++- nova/virt/fake.py | 9 +- nova/virt/libvirt/driver.py | 3 + 11 files changed, 440 insertions(+), 10 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 5192573da2b..31c6be60bcb 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2115,6 +2115,61 @@ def _get_request_group_mapping(request_spec): else: return None + def _update_pci_request_spec_with_allocated_interface_name( + self, context, instance, request_group_resource_providers_mapping): + if not instance.pci_requests: + return + + def needs_update(pci_request, mapping): + return (pci_request.requester_id + and pci_request.requester_id in mapping) + + modified = False + for pci_request in instance.pci_requests.requests: + if needs_update( + pci_request, request_group_resource_providers_mapping): + + provider_uuids = request_group_resource_providers_mapping[ + pci_request.requester_id] + + if len(provider_uuids) != 1: + reason = ( + 'Allocating resources from more than one resource ' + 'providers %(providers)s for a single pci request ' + '%(requester)s is not supported.' % + {'providers': provider_uuids, + 'requester': pci_request.requester_id}) + raise exception.BuildAbortException( + instance_uuid=instance.uuid, + reason=reason) + + dev_rp_name = self.reportclient.get_resource_provider_name( + context, + provider_uuids[0]) + + # NOTE(gibi): the device RP name reported by neutron is + # structured like :: + rp_name_pieces = dev_rp_name.split(':') + if len(rp_name_pieces) != 3: + reason = ( + 'Resource provider %(provider)s used to allocate ' + 'resources for the pci request %(requester)s does not ' + 'have properly formatted name. Expected name format ' + 'is ::, but got ' + '%(provider_name)s' % + {'provider': provider_uuids[0], + 'requester': pci_request.requester_id, + 'provider_name': dev_rp_name}) + raise exception.BuildAbortException( + instance_uuid=instance.uuid, + reason=reason) + + for spec in pci_request.spec: + spec['parent_ifname'] = rp_name_pieces[2] + modified = True + if modified: + instance.save() + def _build_and_run_instance(self, context, instance, image, injected_files, admin_password, requested_networks, security_groups, block_device_mapping, node, limits, filter_properties, @@ -2136,6 +2191,13 @@ def _build_and_run_instance(self, context, instance, image, injected_files, self._check_device_tagging(requested_networks, block_device_mapping) self._check_trusted_certs(instance) + request_group_resource_providers_mapping = \ + self._get_request_group_mapping(request_spec) + + if request_group_resource_providers_mapping: + self._update_pci_request_spec_with_allocated_interface_name( + context, instance, request_group_resource_providers_mapping) + try: scheduler_hints = self._get_scheduler_hints(filter_properties, request_spec) diff --git a/nova/pci/stats.py b/nova/pci/stats.py index 40736ef726b..dcf26d3f74e 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -95,6 +95,14 @@ def _create_pool_keys_from_dev(self, dev): pool = {k: getattr(dev, k) for k in self.pool_keys} if tags: pool.update(tags) + # NOTE(gibi): parent_ifname acts like a tag during pci claim but + # not provided as part of the whitelist spec as it is auto detected + # by the virt driver. + # This key is used for match InstancePciRequest backed by neutron ports + # that has resource_request and therefore that has resource allocation + # already in placement. + if dev.extra_info.get('parent_ifname'): + pool['parent_ifname'] = dev.extra_info['parent_ifname'] return pool def add_device(self, dev): diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 0cdd2b0ac64..609b3718952 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -400,6 +400,27 @@ def get_provider_traits(self, context, rp_uuid): 'status_code': resp.status_code, 'err_text': resp.text}) raise exception.ResourceProviderTraitRetrievalFailed(uuid=rp_uuid) + def get_resource_provider_name(self, context, uuid): + """Return the name of a RP. It tries to use the internal of RPs or + falls back to calling placement directly. + + :param context: The security context + :param uuid: UUID identifier for the resource provider to look up + :return: The name of the RP + :raise: ResourceProviderRetrievalFailed if the RP is not in the cache + and the communication with the placement is failed. + :raise: ResourceProviderNotFound if the RP does not exists. + """ + + try: + return self._provider_tree.data(uuid).name + except ValueError: + rsp = self._get_resource_provider(context, uuid) + if rsp is None: + raise exception.ResourceProviderNotFound(name_or_uuid=uuid) + else: + return rsp['name'] + @safe_connect def _get_resource_provider(self, context, uuid): """Queries the placement API for a resource provider record with the diff --git a/nova/tests/fixtures.py b/nova/tests/fixtures.py index 0ee89734541..b727f54cc57 100644 --- a/nova/tests/fixtures.py +++ b/nova/tests/fixtures.py @@ -1311,16 +1311,33 @@ class NeutronFixture(fixtures.Fixture): } network_2['subnets'] = [subnet_2['id']] + sriov_port = { + 'id': '5460ee0c-ffbb-4e45-8d58-37bfceabd084', + 'network_id': network_2['id'], + 'admin_state_up': True, + 'status': 'ACTIVE', + 'mac_address': '52:54:00:1e:59:c4', + 'fixed_ips': [ + { + 'ip_address': '192.168.13.2', + 'subnet_id': subnet_2['id'] + } + ], + 'tenant_id': tenant_id, + 'resource_request': {}, + 'binding:vnic_type': 'direct', + } + port_with_sriov_resource_request = { 'id': '7059503b-a648-40fd-a561-5ca769304bee', 'network_id': network_2['id'], 'admin_state_up': True, 'status': 'ACTIVE', - 'mac_address': '52:54:00:1e:59:c4', + 'mac_address': '52:54:00:1e:59:c5', # Do neutron really adds fixed_ips to an direct vnic_type port? 'fixed_ips': [ { - 'ip_address': '192.168.13.2', + 'ip_address': '192.168.13.3', 'subnet_id': subnet_2['id'] } ], diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 6cbc6e3a6ee..1ef40e5aa75 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -5924,13 +5924,15 @@ def setUp(self): self._create_networking_rp_tree(self.compute1_rp_uuid) - # add an extra port and the related network to the neutron fixture + # add extra ports and the related network to the neutron fixture # specifically for these tests. It cannot be added globally in the # fixture init as it adds a second network that makes auto allocation # based test to fail due to ambiguous networks. self.neutron._ports[ self.neutron.port_with_sriov_resource_request['id']] = \ copy.deepcopy(self.neutron.port_with_sriov_resource_request) + self.neutron._ports[self.neutron.sriov_port['id']] = \ + copy.deepcopy(self.neutron.sriov_port) self.neutron._networks[ self.neutron.network_2['id']] = self.neutron.network_2 self.neutron._subnets[ @@ -6498,6 +6500,118 @@ def test_interface_detach_with_port_with_bandwidth_request(self): binding_profile = updated_port['binding:profile'] self.assertNotIn('allocation', binding_profile) + def test_two_sriov_ports_one_with_request_two_available_pfs(self): + """Verify that the port's bandwidth allocated from the same PF as + the allocated VF. + + One compute host: + * PF1 (0000:01:00) is configured for physnet1 + * PF2 (0000:02:00) is configured for physnet2, with 1 VF and bandwidth + inventory + * PF3 (0000:03:00) is configured for physnet2, with 1 VF but without + bandwidth inventory + + One instance will be booted with two neutron ports, both ports + requested to be connected to physnet2. One port has resource request + the other does not have resource request. The port having the resource + request cannot be allocated to PF3 and PF1 while the other port that + does not have resource request can be allocated to PF2 or PF3. + + For the detailed compute host config see the FakeDriverWithPciResources + class. For the necessary passthrough_whitelist config see the setUp of + the PortResourceRequestBasedSchedulingTestBase class. + """ + + sriov_port = self.neutron.sriov_port + sriov_port_with_res_req = self.neutron.port_with_sriov_resource_request + server = self._create_server( + flavor=self.flavor_with_group_policy, + networks=[ + {'port': sriov_port_with_res_req['id']}, + {'port': sriov_port['id']}]) + + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + + sriov_port = self.neutron.show_port(sriov_port['id'])['port'] + sriov_port_with_res_req = self.neutron.show_port( + sriov_port_with_res_req['id'])['port'] + + allocations = self.placement_api.get( + '/allocations/%s' % server['id']).body['allocations'] + + # We expect one set of allocations for the compute resources on the + # compute rp and one set for the networking resources on the sriov PF2 + # rp. + self.assertEqual(2, len(allocations)) + compute_allocations = allocations[self.compute1_rp_uuid]['resources'] + self.assertEqual( + self._resources_from_flavor(self.flavor_with_group_policy), + compute_allocations) + + sriov_allocations = allocations[self.sriov_pf2_rp_uuid]['resources'] + self.assertPortMatchesAllocation( + sriov_port_with_res_req, sriov_allocations) + + # We expect that only the RP uuid of the networking RP having the port + # allocation is sent in the port binding for the port having resource + # request + sriov_with_req_binding = sriov_port_with_res_req['binding:profile'] + self.assertEqual( + self.sriov_pf2_rp_uuid, sriov_with_req_binding['allocation']) + # and the port without resource request does not have allocation + sriov_binding = sriov_port['binding:profile'] + self.assertNotIn('allocation', sriov_binding) + + # We expect that the selected PCI device matches with the RP from + # where the bandwidth is allocated from. The bandwidth is allocated + # from 0000:02:00 (PF2) so the PCI device should be a VF of that PF + self.assertEqual('0000:02:00.1', sriov_with_req_binding['pci_slot']) + # But also the port that has no resource request still gets a pci slot + # allocated. The 0000:02:00 has no more VF available but 0000:03:00 has + # one VF available and that PF is also on physnet2 + self.assertEqual('0000:03:00.1', sriov_binding['pci_slot']) + + def test_one_sriov_port_no_vf_and_bandwidth_available_on_the_same_pf(self): + """Verify that if there is no PF that both provides bandwidth and VFs + then the boot will fail. + """ + + # boot a server with a single sriov port that has no resource request + sriov_port = self.neutron.sriov_port + server = self._create_server( + flavor=self.flavor_with_group_policy, + networks=[{'port': sriov_port['id']}]) + + self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + sriov_port = self.neutron.show_port(sriov_port['id'])['port'] + sriov_binding = sriov_port['binding:profile'] + + # We expect that this consume the last available VF from the PF2 + self.assertEqual('0000:02:00.1', sriov_binding['pci_slot']) + + # Now boot a second server with a port that has resource request + # At this point PF2 has available bandwidth but no available VF + # and PF3 has available VF but no available bandwidth so we expect + # the boot to fail. + + sriov_port_with_res_req = self.neutron.port_with_sriov_resource_request + server = self._create_server( + flavor=self.flavor_with_group_policy, + networks=[{'port': sriov_port_with_res_req['id']}]) + + # NOTE(gibi): It should be NoValidHost in an ideal world but that would + # require the scheduler to detect the situation instead of the pci + # claim. However that is pretty hard as the scheduler does not know + # anything about allocation candidates (e.g. that the only candidate + # for the port in this case is PF2) it see the whole host as a + # candidate and in our host there is available VF for the request even + # if that is on the wrong PF. + server = self._wait_for_state_change(self.admin_api, server, 'ERROR') + self.assertIn( + 'Exceeded maximum number of retries. Exhausted all hosts ' + 'available for retrying build failures for instance', + server['fault']['message']) + class PortResourceRequestReSchedulingTestIgnoreMicroversionCheck( PortResourceRequestBasedSchedulingTestBase): diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 319f4b69d53..04e7884f2c1 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -4960,6 +4960,7 @@ def setUp(self): vm_state=vm_states.ACTIVE, expected_attrs=['metadata', 'system_metadata', 'info_cache']) self.instance.trusted_certs = None # avoid lazy-load failures + self.instance.pci_requests = None # avoid lazy-load failures self.admin_pass = 'pass' self.injected_files = [] self.image = {} @@ -6637,6 +6638,129 @@ def test_build_with_resource_request_in_the_request_spec(self): self.context, self.instance, self.requested_networks, self.security_groups, {uuids.port1: [uuids.rp1]}) + def test_build_with_resource_request_sriov_port(self): + request_spec = objects.RequestSpec( + requested_resources=[ + objects.RequestGroup( + requester_id=uuids.port1, + provider_uuids=[uuids.rp1])]) + + # NOTE(gibi): the first request will not match to any request group + # this is the case when the request is not coming from a Neutron port + # but from flavor or when the instance is old enough that the + # requester_id field is not filled. + # The second request will match with the request group in the request + # spec and will trigger an update on that pci request. + # The third request is coming from a Neutron port which doesn't have + # resource request and therefore no matching request group exists in + # the request spec. + self.instance.pci_requests = objects.InstancePCIRequests(requests=[ + objects.InstancePCIRequest(), + objects.InstancePCIRequest( + requester_id=uuids.port1, + spec=[{'vendor_id': '1377', 'product_id': '0047'}]), + objects.InstancePCIRequest(requester_id=uuids.port2), + ]) + with test.nested( + mock.patch.object(self.compute.driver, 'spawn'), + mock.patch.object(self.compute, + '_build_networks_for_instance', return_value=[]), + mock.patch.object(self.instance, 'save'), + mock.patch('nova.scheduler.client.report.' + 'SchedulerReportClient._get_resource_provider'), + ) as (mock_spawn, mock_networks, mock_save, mock_get_rp): + mock_get_rp.return_value = { + 'uuid': uuids.rp1, + 'name': 'compute1:sriov-agent:ens3' + } + self.compute._build_and_run_instance( + self.context, + self.instance, self.image, self.injected_files, + self.admin_pass, self.requested_networks, + self.security_groups, self.block_device_mapping, self.node, + self.limits, self.filter_properties, request_spec) + + mock_networks.assert_called_once_with( + self.context, self.instance, self.requested_networks, + self.security_groups, {uuids.port1: [uuids.rp1]}) + mock_get_rp.assert_called_once_with(self.context, uuids.rp1) + # As the second pci request matched with the request group from the + # request spec. So that pci request is extended with the + # parent_ifname calculated from the corresponding RP name. + self.assertEqual( + [{'parent_ifname': 'ens3', + 'vendor_id': '1377', + 'product_id': '0047'}], + self.instance.pci_requests.requests[1].spec) + # the rest of the pci requests are unchanged + self.assertNotIn('spec', self.instance.pci_requests.requests[0]) + self.assertNotIn('spec', self.instance.pci_requests.requests[2]) + + def test_build_with_resource_request_sriov_rp_not_found(self): + request_spec = objects.RequestSpec( + requested_resources=[ + objects.RequestGroup( + requester_id=uuids.port1, + provider_uuids=[uuids.rp1])]) + + self.instance.pci_requests = objects.InstancePCIRequests(requests=[ + objects.InstancePCIRequest(requester_id=uuids.port1)]) + with mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') as (mock_get_rp): + mock_get_rp.return_value = None + + self.assertRaises( + exception.ResourceProviderNotFound, + self.compute._build_and_run_instance, + self.context, + self.instance, self.image, self.injected_files, + self.admin_pass, self.requested_networks, + self.security_groups, self.block_device_mapping, self.node, + self.limits, self.filter_properties, request_spec) + + def test_build_with_resource_request_sriov_rp_wrongly_formatted_name(self): + request_spec = objects.RequestSpec( + requested_resources=[ + objects.RequestGroup( + requester_id=uuids.port1, + provider_uuids=[uuids.rp1])]) + + self.instance.pci_requests = objects.InstancePCIRequests(requests=[ + objects.InstancePCIRequest(requester_id=uuids.port1)]) + with mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') as (mock_get_rp): + mock_get_rp.return_value = { + 'uuid': uuids.rp1, + 'name': 'my-awesome-rp' + } + self.assertRaises( + exception.BuildAbortException, + self.compute._build_and_run_instance, + self.context, + self.instance, self.image, self.injected_files, + self.admin_pass, self.requested_networks, + self.security_groups, self.block_device_mapping, self.node, + self.limits, self.filter_properties, request_spec) + + def test_build_with_resource_request_more_than_one_providers(self): + request_spec = objects.RequestSpec( + requested_resources=[ + objects.RequestGroup( + requester_id=uuids.port1, + provider_uuids=[uuids.rp1, uuids.rp2])]) + + self.instance.pci_requests = objects.InstancePCIRequests(requests=[ + objects.InstancePCIRequest(requester_id=uuids.port1)]) + + self.assertRaises( + exception.BuildAbortException, + self.compute._build_and_run_instance, + self.context, + self.instance, self.image, self.injected_files, + self.admin_pass, self.requested_networks, + self.security_groups, self.block_device_mapping, self.node, + self.limits, self.filter_properties, request_spec) + class ComputeManagerErrorsOutMigrationTestCase(test.NoDBTestCase): def setUp(self): diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index a2797c24794..23bc1307ed4 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -2602,6 +2602,60 @@ def test_set_aggregates_for_provider_no_short_circuit(self): self.assertEqual(set(), ptree_data.aggregates) self.assertEqual(5, ptree_data.generation) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider', return_value=mock.NonCallableMock) + def test_get_resource_provider_name_from_cache(self, mock_placement_get): + expected_name = 'rp' + self.client._provider_tree.new_root( + expected_name, uuids.rp, generation=0) + + actual_name = self.client.get_resource_provider_name( + self.context, uuids.rp) + + self.assertEqual(expected_name, actual_name) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') + def test_get_resource_provider_name_from_placement( + self, mock_placement_get): + expected_name = 'rp' + mock_placement_get.return_value = { + 'uuid': uuids.rp, + 'name': expected_name + } + + actual_name = self.client.get_resource_provider_name( + self.context, uuids.rp) + + self.assertEqual(expected_name, actual_name) + mock_placement_get.assert_called_once_with(self.context, uuids.rp) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') + def test_get_resource_provider_name_rp_not_found_in_placement( + self, mock_placement_get): + mock_placement_get.side_effect = \ + exception.ResourceProviderNotFound(uuids.rp) + + self.assertRaises( + exception.ResourceProviderNotFound, + self.client.get_resource_provider_name, + self.context, uuids.rp) + + mock_placement_get.assert_called_once_with(self.context, uuids.rp) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') + def test_get_resource_provider_name_placement_unavailable( + self, mock_placement_get): + mock_placement_get.side_effect = \ + exception.ResourceProviderRetrievalFailed(uuid=uuids.rp) + + self.assertRaises( + exception.ResourceProviderRetrievalFailed, + self.client.get_resource_provider_name, + self.context, uuids.rp) + class TestAggregates(SchedulerReportClientTestCase): def test_get_provider_aggregates_found(self): diff --git a/nova/tests/unit/virt/libvirt/fakelibvirt.py b/nova/tests/unit/virt/libvirt/fakelibvirt.py index dbd5a84e4e5..6adf4f6594f 100644 --- a/nova/tests/unit/virt/libvirt/fakelibvirt.py +++ b/nova/tests/unit/virt/libvirt/fakelibvirt.py @@ -1552,6 +1552,12 @@ def setUp(self): self.useFixture( fixtures.MockPatch('nova.virt.libvirt.utils.get_fs_info')) + # libvirt driver needs to call out to the filesystem to get the + # parent_ifname for the SRIOV VFs. + self.useFixture(fixtures.MockPatch( + 'nova.pci.utils.get_ifname_by_pci_address', + return_value='fake_pf_interface_name')) + # Don't assume that the system running tests has a valid machine-id self.useFixture(fixtures.MockPatch( 'nova.virt.libvirt.driver.LibvirtDriver' diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 074733b4299..18e77216044 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -15032,7 +15032,9 @@ def test_get_pcinet_info(self): mock_get_net_name.called_once_with(parent_address) mock_dev_lookup.called_once_with(dev_name) - def test_get_pcidev_info(self): + @mock.patch.object(pci_utils, 'get_ifname_by_pci_address', + return_value='ens1') + def test_get_pcidev_info(self, mock_get_ifname): self.stub_out('nova.virt.libvirt.host.Host.device_lookup_by_name', lambda self, name: FakeNodeDevice( _fake_NodeDevXml[name])) @@ -15061,6 +15063,7 @@ def test_get_pcidev_info(self): "label": 'label_8086_1520', "dev_type": fields.PciDeviceType.SRIOV_VF, "parent_addr": '0000:04:00.3', + "parent_ifname": "ens1", } self.assertEqual(expect_vf, actualvf) @@ -15079,6 +15082,7 @@ def test_get_pcidev_info(self): "capabilities": { "network": ["rx", "tx", "sg", "tso", "gso", "gro", "rxvlan", "txvlan"]}, + "parent_ifname": "ens1", } self.assertEqual(expect_vf, actualvf) @@ -15147,10 +15151,12 @@ def test_list_devices_not_supported(self): self.assertRaises(fakelibvirt.libvirtError, drvr._get_pci_passthrough_devices) + @mock.patch.object(pci_utils, 'get_ifname_by_pci_address', + return_value='ens1') @mock.patch.object(host.Host, 'list_pci_devices', return_value=['pci_0000_04_00_3', 'pci_0000_04_10_7', 'pci_0000_04_11_7']) - def test_get_pci_passthrough_devices(self, mock_list): + def test_get_pci_passthrough_devices(self, mock_list, mock_get_ifname): self.stub_out('nova.virt.libvirt.host.Host.device_lookup_by_name', lambda self, name: FakeNodeDevice( _fake_NodeDevXml[name])) @@ -15176,7 +15182,9 @@ def test_get_pci_passthrough_devices(self, mock_list): "numa_node": None, "dev_type": fields.PciDeviceType.SRIOV_VF, "phys_function": [('0x0000', '0x04', '0x00', '0x3')], - "parent_addr": "0000:04:00.3"}, + "parent_addr": "0000:04:00.3", + "parent_ifname": "ens1", + }, { "dev_id": "pci_0000_04_11_7", "domain": 0, @@ -15186,7 +15194,8 @@ def test_get_pci_passthrough_devices(self, mock_list): "numa_node": 0, "dev_type": fields.PciDeviceType.SRIOV_VF, "phys_function": [('0x0000', '0x04', '0x00', '0x3')], - "parent_addr": "0000:04:00.3" + "parent_addr": "0000:04:00.3", + "parent_ifname": "ens1", } ] @@ -15197,6 +15206,15 @@ def test_get_pci_passthrough_devices(self, mock_list): self.assertEqual(expectvfs[dev][key], actualvfs[dev][key]) mock_list.assert_called_once_with() + # The first call for every VF is to determine parent_ifname and + # the second call to determine the MAC address. + mock_get_ifname.assert_has_calls([ + mock.call('0000:04:10.7', pf_interface=True), + mock.call('0000:04:10.7', False), + mock.call('0000:04:11.7', pf_interface=True), + mock.call('0000:04:11.7', False) + ]) + # TODO(stephenfin): This only has one caller. Flatten it and remove the # 'mempages=False' branches or add the missing test def _test_get_host_numa_topology(self, mempages): diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 655dd6c5ce8..7b88ccb5c45 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -903,13 +903,13 @@ def setUp(self): def get_available_resource(self, nodename): host_status = super( FakeDriverWithPciResources, self).get_available_resource(nodename) - # 01:00 - PF + # 01:00 - PF - ens1 # |---- 01:00.1 - VF # - # 02:00 - PF + # 02:00 - PF - ens2 # |---- 02:00.1 - VF # - # 03:00 - PF + # 03:00 - PF - ens3 # |---- 03:00.1 - VF host_status['pci_passthrough_devices'] = jsonutils.dumps([ { @@ -931,6 +931,7 @@ def get_available_resource(self, nodename): 'parent_addr': '0000:01:00', 'numa_node': 0, 'label': 'fake-label', + "parent_ifname": "ens1", }, { 'address': '0000:02:00.0', @@ -951,6 +952,7 @@ def get_available_resource(self, nodename): 'parent_addr': '0000:02:00', 'numa_node': 0, 'label': 'fake-label', + "parent_ifname": "ens2", }, { 'address': '0000:03:00.0', @@ -971,6 +973,7 @@ def get_available_resource(self, nodename): 'parent_addr': '0000:03:00', 'numa_node': 0, 'label': 'fake-label', + "parent_ifname": "ens3", }, ]) return host_status diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 9be5bf691f3..20beb23e45e 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -5935,6 +5935,9 @@ def _get_device_type(cfgdev, pci_address): return { 'dev_type': fields.PciDeviceType.SRIOV_VF, 'parent_addr': phys_address, + 'parent_ifname': + pci_utils.get_ifname_by_pci_address( + pci_address, pf_interface=True), } return {'dev_type': fields.PciDeviceType.STANDARD}