From 6b3b04d2113915729fd9aced9839338e429e1a01 Mon Sep 17 00:00:00 2001 From: Sergey Nikitin Date: Tue, 25 Oct 2016 17:24:36 +0300 Subject: [PATCH] Add PCI NUMA policies This patch adds new policies for PCI devices allocation. There are 3 policies: - legacy - this is the default value and it describes the current nova behavior. Nova will boot VMs with PCI device if the PCI device is associated with at least one NUMA node on which the instance should be booted or there is no information about PCI-NUMA association - required - nova will boot VMs with PCI devices *only* if at least one of the VM's NUMA node is associated with these PCI devices. - preferred - nova will boot VMs using best effort NUMA affinity bp share-pci-between-numa-nodes Change-Id: I46d483f9de6776db1b025f925890624e5e682ada Co-Authored-By: Stephen Finucane --- nova/conf/pci.py | 32 ++- nova/pci/request.py | 141 +++++++----- nova/pci/stats.py | 152 ++++++++++--- nova/tests/unit/pci/test_request.py | 114 ++++++---- nova/tests/unit/pci/test_stats.py | 203 +++++++++++++++--- ...i-between-numa-nodes-0bd206eeca4ebcde.yaml | 17 ++ 6 files changed, 506 insertions(+), 153 deletions(-) create mode 100644 releasenotes/notes/share-pci-between-numa-nodes-0bd206eeca4ebcde.yaml diff --git a/nova/conf/pci.py b/nova/conf/pci.py index cc417818e99..f8464b48adc 100644 --- a/nova/conf/pci.py +++ b/nova/conf/pci.py @@ -28,28 +28,40 @@ help=""" An alias for a PCI passthrough device requirement. -This allows users to specify the alias in the extra_spec for a flavor, without +This allows users to specify the alias in the extra specs for a flavor, without needing to repeat all the PCI property requirements. Possible Values: -* A list of JSON values which describe the aliases. For example: +* A list of JSON values which describe the aliases. For example:: alias = { "name": "QuickAssist", "product_id": "0443", "vendor_id": "8086", - "device_type": "type-PCI" + "device_type": "type-PCI", + "numa_policy": "required" } - defines an alias for the Intel QuickAssist card. (multi valued). Valid key - values are : + This defines an alias for the Intel QuickAssist card. (multi valued). Valid + key values are : - * "name": Name of the PCI alias. - * "product_id": Product ID of the device in hexadecimal. - * "vendor_id": Vendor ID of the device in hexadecimal. - * "device_type": Type of PCI device. Valid values are: "type-PCI", - "type-PF" and "type-VF". + ``name`` + Name of the PCI alias. + + ``product_id`` + Product ID of the device in hexadecimal. + + ``vendor_id`` + Vendor ID of the device in hexadecimal. + + ``device_type`` + Type of PCI device. Valid values are: ``type-PCI``, ``type-PF`` and + ``type-VF``. + + ``numa_policy`` + Required NUMA affinity of device. Valid values are: ``legacy``, + ``preferred`` and ``required``. """), cfg.MultiStrOpt('passthrough_whitelist', default=[], diff --git a/nova/pci/request.py b/nova/pci/request.py index 84465982fe0..d37fcc6a360 100644 --- a/nova/pci/request.py +++ b/nova/pci/request.py @@ -21,9 +21,10 @@ | "product_id": "0443", | "vendor_id": "8086", | "device_type": "type-PCI", + | "numa_policy": "legacy" | }' - Aliases with the same name and the same device_type are ORed:: + Aliases with the same name, device_type and numa_policy are ORed:: | [pci] | alias = '{ @@ -35,11 +36,8 @@ These two aliases define a device request meaning: vendor_id is "8086" and product_id is "0442" or "0443". - """ -import copy - import jsonschema from oslo_serialization import jsonutils import six @@ -59,13 +57,8 @@ network_model.VNIC_TYPE_DIRECT_PHYSICAL: obj_fields.PciDeviceType.SRIOV_PF } - CONF = nova.conf.CONF - -_ALIAS_DEV_TYPE = [obj_fields.PciDeviceType.STANDARD, - obj_fields.PciDeviceType.SRIOV_PF, - obj_fields.PciDeviceType.SRIOV_VF] _ALIAS_CAP_TYPE = ['pci'] _ALIAS_SCHEMA = { "type": "object", @@ -76,6 +69,8 @@ "minLength": 1, "maxLength": 256, }, + # TODO(stephenfin): This isn't used anywhere outside of tests and + # should probably be removed. "capability_type": { "type": "string", "enum": _ALIAS_CAP_TYPE, @@ -90,7 +85,11 @@ }, "device_type": { "type": "string", - "enum": _ALIAS_DEV_TYPE, + "enum": list(obj_fields.PciDeviceType.ALL), + }, + "numa_policy": { + "type": "string", + "enum": list(obj_fields.PCINUMAAffinityPolicy.ALL), }, }, "required": ["name"], @@ -98,92 +97,124 @@ def _get_alias_from_config(): - """Parse and validate PCI aliases from the nova config.""" + """Parse and validate PCI aliases from the nova config. + + :returns: A dictionary where the keys are device names and the values are + tuples of form ``(specs, numa_policy)``. ``specs`` is a list of PCI + device specs, while ``numa_policy`` describes the required NUMA + affinity of the device(s). + :raises: exception.PciInvalidAlias if two aliases with the same name have + different device types or different NUMA policies. + """ jaliases = CONF.pci.alias aliases = {} # map alias name to alias spec list try: for jsonspecs in jaliases: spec = jsonutils.loads(jsonspecs) jsonschema.validate(spec, _ALIAS_SCHEMA) - # It should keep consistent behaviour in configuration - # and extra specs to call strip() function. - name = spec.pop("name").strip() + + name = spec.pop('name').strip() + numa_policy = spec.pop('numa_policy', None) + if not numa_policy: + numa_policy = obj_fields.PCINUMAAffinityPolicy.LEGACY + dev_type = spec.pop('device_type', None) if dev_type: spec['dev_type'] = dev_type + if name not in aliases: - aliases[name] = [spec] - else: - if aliases[name][0]["dev_type"] == spec["dev_type"]: - aliases[name].append(spec) - else: - reason = _("Device type mismatch for alias '%s'") % name - raise exception.PciInvalidAlias(reason=reason) + aliases[name] = (numa_policy, [spec]) + continue + if aliases[name][0] != numa_policy: + reason = _("NUMA policy mismatch for alias '%s'") % name + raise exception.PciInvalidAlias(reason=reason) + + if aliases[name][1][0]['dev_type'] != spec['dev_type']: + reason = _("Device type mismatch for alias '%s'") % name + raise exception.PciInvalidAlias(reason=reason) + + aliases[name][1].append(spec) except exception.PciInvalidAlias: raise - except Exception as e: - raise exception.PciInvalidAlias(reason=six.text_type(e)) + except jsonschema.exceptions.ValidationError as exc: + raise exception.PciInvalidAlias(reason=exc.message) + except Exception as exc: + raise exception.PciInvalidAlias(reason=six.text_type(exc)) return aliases def _translate_alias_to_requests(alias_spec): """Generate complete pci requests from pci aliases in extra_spec.""" - pci_aliases = _get_alias_from_config() - pci_requests = [] # list of a specs dict + pci_requests = [] for name, count in [spec.split(':') for spec in alias_spec.split(',')]: name = name.strip() if name not in pci_aliases: raise exception.PciRequestAliasNotDefined(alias=name) - else: - request = objects.InstancePCIRequest( - count=int(count), - spec=copy.deepcopy(pci_aliases[name]), - alias_name=name) - pci_requests.append(request) + + count = int(count) + numa_policy, spec = pci_aliases[name] + + pci_requests.append(objects.InstancePCIRequest( + count=count, + spec=spec, + alias_name=name, + numa_policy=numa_policy)) return pci_requests def get_pci_requests_from_flavor(flavor): - """Get flavor's pci request. + """Validate and return PCI requests. + + The ``pci_passthrough:alias`` extra spec describes the flavor's PCI + requests. The extra spec's value is a comma-separated list of format + ``alias_name_x:count, alias_name_y:count, ... ``, where ``alias_name`` is + defined in ``pci.alias`` configurations. - The pci_passthrough:alias scope in flavor extra_specs - describes the flavor's pci requests, the key is - 'pci_passthrough:alias' and the value has format - 'alias_name_x:count, alias_name_y:count, ... '. The alias_name is - defined in 'pci.alias' configurations. + The flavor's requirement is translated into a PCI requests list. Each + entry in the list is an instance of nova.objects.InstancePCIRequests with + four keys/attributes. - The flavor's requirement is translated into a pci requests list. - Each entry in the list is a dict-ish object with three keys/attributes. - The 'specs' gives the pci device properties requirement, the 'count' gives - the number of devices, and the optional 'alias_name' is the corresponding - alias definition name. + - 'spec' states the PCI device properties requirement + - 'count' states the number of devices + - 'alias_name' (optional) is the corresponding alias definition name + - 'numa_policy' (optional) states the required NUMA affinity of the devices - Example: - Assume alias configuration is:: + For example, assume alias configuration is:: - | {'vendor_id':'8086', - | 'device_id':'1502', - | 'name':'alias_1'} + { + 'vendor_id':'8086', + 'device_id':'1502', + 'name':'alias_1' + } - The flavor extra specs includes: 'pci_passthrough:alias': 'alias_1:2'. + While flavor extra specs includes:: - The returned pci_requests are:: + 'pci_passthrough:alias': 'alias_1:2' - | pci_requests = [{'count':2, - | 'specs': [{'vendor_id':'8086', - | 'device_id':'1502'}], - | 'alias_name': 'alias_1'}] + The returned ``pci_requests`` are:: - :param flavor: the flavor to be checked - :returns: a list of pci requests + [{ + 'count':2, + 'specs': [{'vendor_id':'8086', 'device_id':'1502'}], + 'alias_name': 'alias_1' + }] + + :param flavor: The flavor to be checked + :returns: A list of PCI requests + :rtype: nova.objects.InstancePCIRequests + :raises: exception.PciRequestAliasNotDefined if an invalid PCI alias is + provided + :raises: exception.PciInvalidAlias if the configuration contains invalid + aliases. """ pci_requests = [] if ('extra_specs' in flavor and 'pci_passthrough:alias' in flavor['extra_specs']): pci_requests = _translate_alias_to_requests( flavor['extra_specs']['pci_passthrough:alias']) + return objects.InstancePCIRequests(requests=pci_requests) diff --git a/nova/pci/stats.py b/nova/pci/stats.py index 7754c068a97..4e7bc589966 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -151,7 +151,11 @@ def consume_requests(self, pci_requests, numa_cells=None): # a spec may be able to match multiple pools. pools = self._filter_pools_for_spec(self.pools, spec) if numa_cells: - pools = self._filter_pools_for_numa_cells(pools, numa_cells) + numa_policy = None + if 'numa_policy' in request: + numa_policy = request.numa_policy + pools = self._filter_pools_for_numa_cells( + pools, numa_cells, numa_policy, count) pools = self._filter_non_requested_pfs(request, pools) # Failed to allocate the required number of devices # Return the devices already allocated back to their pools @@ -209,17 +213,68 @@ def _filter_pools_for_spec(pools, request_specs): return [pool for pool in pools if utils.pci_device_prop_match(pool, request_specs)] - @staticmethod - def _filter_pools_for_numa_cells(pools, numa_cells): - # Some systems don't report numa node info for pci devices, in - # that case None is reported in pci_device.numa_node, by adding None - # to numa_cells we allow assigning those devices to instances with - # numa topology - numa_cells = [None] + [cell.id for cell in numa_cells] - # filter out pools which numa_node is not included in numa_cells - return [pool for pool in pools if any(utils.pci_device_prop_match( - pool, [{'numa_node': cell}]) - for cell in numa_cells)] + @classmethod + def _filter_pools_for_numa_cells(cls, pools, numa_cells, numa_policy, + requested_count): + """Filter out pools with the wrong NUMA affinity, if required. + + Exclude pools that do not have *suitable* PCI NUMA affinity. + ``numa_policy`` determines what *suitable* means, being one of + PREFERRED (nice-to-have), LEGACY (must-have-if-available) and REQUIRED + (must-have). We iterate through the various policies in order of + strictness. This means that even if we only *prefer* PCI-NUMA affinity, + we will still attempt to provide it if possible. + + :param pools: A list of PCI device pool dicts + :param numa_cells: A list of InstanceNUMACell objects whose ``id`` + corresponds to the ``id`` of host NUMACells. + :param numa_policy: The PCI NUMA affinity policy to apply. + :param requested_count: The number of PCI devices requested. + :returns: A list of pools that can, together, provide at least + ``requested_count`` PCI devices with the level of NUMA affinity + required by ``numa_policy``. + """ + # NOTE(stephenfin): We may wish to change the default policy at a later + # date + requested_policy = numa_policy or fields.PCINUMAAffinityPolicy.LEGACY + numa_cell_ids = [cell.id for cell in numa_cells] + + # filter out pools which numa_node is not included in numa_cell_ids + filtered_pools = [ + pool for pool in pools if any(utils.pci_device_prop_match( + pool, [{'numa_node': cell}]) for cell in numa_cell_ids)] + + # we can't apply a less strict policy than the one requested, so we + # need to return if we've demanded a NUMA affinity of REQUIRED. + # However, NUMA affinity is a good thing. If we can get enough devices + # with the stricter policy then we will use them. + if requested_policy == fields.PCINUMAAffinityPolicy.REQUIRED or sum( + pool['count'] for pool in filtered_pools) >= requested_count: + return filtered_pools + + # some systems don't report NUMA node info for PCI devices, in which + # case None is reported in 'pci_device.numa_node'. The LEGACY policy + # allows us to use these devices so we include None in the list of + # suitable NUMA cells. + numa_cell_ids.append(None) + + # filter out pools which numa_node is not included in numa_cell_ids + filtered_pools = [ + pool for pool in pools if any(utils.pci_device_prop_match( + pool, [{'numa_node': cell}]) for cell in numa_cell_ids)] + + # once again, we can't apply a less strict policy than the one + # requested, so we need to return if we've demanded a NUMA affinity of + # LEGACY. Similarly, we will also reurn if we have enough devices to + # satisfy this somewhat strict policy. + if requested_policy == fields.PCINUMAAffinityPolicy.LEGACY or sum( + pool['count'] for pool in filtered_pools) >= requested_count: + return filtered_pools + + # if we've got here, we're using the PREFERRED policy and weren't able + # to provide anything with stricter affinity. Use whatever devices you + # can, folks. + return pools def _filter_non_requested_pfs(self, request, matching_pools): # Remove SRIOV_PFs from pools, unless it has been explicitly requested @@ -236,16 +291,46 @@ def _filter_pools_for_pfs(pools): if not pool.get('dev_type') == fields.PciDeviceType.SRIOV_PF] def _apply_request(self, pools, request, numa_cells=None): + """Apply a PCI request. + + Apply a PCI request against a given set of PCI device pools, which are + collections of devices with similar traits. + + If ``numa_cells`` is provided then NUMA locality may be taken into + account, depending on the value of ``request.numa_policy``. + + :param pools: A list of PCI device pool dicts + :param request: An InstancePCIRequest object describing the type, + quantity and required NUMA affinity of device(s) we want.. + :param numa_cells: A list of InstanceNUMACell objects whose ``id`` + corresponds to the ``id`` of host NUMACells. + """ # NOTE(vladikr): This code maybe open to race conditions. # Two concurrent requests may succeed when called support_requests # because this method does not remove related devices from the pools count = request.count + + # Firstly, let's exclude all devices that don't match our spec (e.g. + # they've got different PCI IDs or something) matching_pools = self._filter_pools_for_spec(pools, request.spec) + + # Next, let's exclude all devices that aren't on the correct NUMA node + # *assuming* we have devices and care about that, as determined by + # policy if numa_cells: + numa_policy = None + if 'numa_policy' in request: + numa_policy = request.numa_policy + matching_pools = self._filter_pools_for_numa_cells(matching_pools, - numa_cells) + numa_cells, numa_policy, count) + + # Finally, if we're not requesting PFs then we should not use these. + # Exclude them. matching_pools = self._filter_non_requested_pfs(request, matching_pools) + + # Do we still have any devices left? if sum([pool['count'] for pool in matching_pools]) < count: return False else: @@ -256,30 +341,47 @@ def _apply_request(self, pools, request, numa_cells=None): return True def support_requests(self, requests, numa_cells=None): - """Check if the pci requests can be met. + """Determine if the PCI requests can be met. - Scheduler checks compute node's PCI stats to decide if an - instance can be scheduled into the node. Support does not - mean real allocation. - If numa_cells is provided then only devices contained in - those nodes are considered. + Determine, based on a compute node's PCI stats, if an instance can be + scheduled on the node. **Support does not mean real allocation**. + + If ``numa_cells`` is provided then NUMA locality may be taken into + account, depending on the value of ``numa_policy``. + + :param requests: A list of InstancePCIRequest object describing the + types, quantities and required NUMA affinities of devices we want. + :type requests: nova.objects.InstancePCIRequests + :param numa_cells: A list of InstanceNUMACell objects whose ``id`` + corresponds to the ``id`` of host NUMACells, or None. + :returns: Whether this compute node can satisfy the given request. """ # note (yjiang5): this function has high possibility to fail, # so no exception should be triggered for performance reason. pools = copy.deepcopy(self.pools) - return all([self._apply_request(pools, r, numa_cells) - for r in requests]) + return all(self._apply_request(pools, r, numa_cells) for r in requests) def apply_requests(self, requests, numa_cells=None): """Apply PCI requests to the PCI stats. This is used in multiple instance creation, when the scheduler has to maintain how the resources are consumed by the instances. - If numa_cells is provided then only devices contained in - those nodes are considered. + + If ``numa_cells`` is provided then NUMA locality may be taken into + account, depending on the value of ``numa_policy``. + + :param requests: A list of InstancePCIRequest object describing the + types, quantities and required NUMA affinities of devices we want. + :type requests: nova.objects.InstancePCIRequests + :param numa_cells: A list of InstanceNUMACell objects whose ``id`` + corresponds to the ``id`` of host NUMACells, or None. + :param numa_policy: The PCI NUMA affinity policy to apply when + filtering devices from ``numa_cells``, or None. + :raises: exception.PciDeviceRequestFailed if this compute node cannot + satisfy the given request. """ - if not all([self._apply_request(self.pools, r, numa_cells) - for r in requests]): + if not all(self._apply_request(self.pools, r, numa_cells) + for r in requests): raise exception.PciDeviceRequestFailed(requests=requests) def __iter__(self): diff --git a/nova/tests/unit/pci/test_request.py b/nova/tests/unit/pci/test_request.py index 1460e7dc16b..7ac20859c8b 100644 --- a/nova/tests/unit/pci/test_request.py +++ b/nova/tests/unit/pci/test_request.py @@ -25,7 +25,8 @@ "capability_type": "pci", "product_id": "4443", "vendor_id": "8086", - "device_type": "type-PCI" + "device_type": "type-PCI", + "numa_policy": "legacy" }""" _fake_alias11 = """{ @@ -62,44 +63,44 @@ class AliasTestCase(test.NoDBTestCase): - def test_good_alias(self): + + def test_valid_alias(self): self.flags(alias=[_fake_alias1], group='pci') - als = request._get_alias_from_config() - self.assertIsInstance(als['QuicAssist'], list) - expect_dict = { - "capability_type": "pci", - "product_id": "4443", - "vendor_id": "8086", - "dev_type": "type-PCI" - } - self.assertEqual(expect_dict, als['QuicAssist'][0]) - - def test_multispec_alias(self): + result = request._get_alias_from_config() + expected_result = ( + 'legacy', + [{ + "capability_type": "pci", + "product_id": "4443", + "vendor_id": "8086", + "dev_type": "type-PCI", + }]) + self.assertEqual(expected_result, result['QuicAssist']) + + def test_valid_multispec_alias(self): self.flags(alias=[_fake_alias1, _fake_alias11], group='pci') - als = request._get_alias_from_config() - self.assertIsInstance(als['QuicAssist'], list) - expect_dict1 = { - "capability_type": "pci", - "product_id": "4443", - "vendor_id": "8086", - "dev_type": "type-PCI" - } - expect_dict2 = { - "capability_type": "pci", - "product_id": "4444", - "vendor_id": "8086", - "dev_type": "type-PCI" - } - - self.assertEqual(expect_dict1, als['QuicAssist'][0]) - self.assertEqual(expect_dict2, als['QuicAssist'][1]) - - def test_wrong_type_aliase(self): + result = request._get_alias_from_config() + expected_result = ( + 'legacy', + [{ + "capability_type": "pci", + "product_id": "4443", + "vendor_id": "8086", + "dev_type": "type-PCI" + }, { + "capability_type": "pci", + "product_id": "4444", + "vendor_id": "8086", + "dev_type": "type-PCI" + }]) + self.assertEqual(expected_result, result['QuicAssist']) + + def test_invalid_type_alias(self): self.flags(alias=[_fake_alias2], group='pci') self.assertRaises(exception.PciInvalidAlias, request._get_alias_from_config) - def test_wrong_product_id_aliase(self): + def test_invalid_product_id_alias(self): self.flags(alias=[ """{ "name": "xxx", @@ -112,7 +113,7 @@ def test_wrong_product_id_aliase(self): self.assertRaises(exception.PciInvalidAlias, request._get_alias_from_config) - def test_wrong_vendor_id_aliase(self): + def test_invalid_vendor_id_alias(self): self.flags(alias=[ """{ "name": "xxx", @@ -125,7 +126,7 @@ def test_wrong_vendor_id_aliase(self): self.assertRaises(exception.PciInvalidAlias, request._get_alias_from_config) - def test_wrong_cap_type_aliase(self): + def test_invalid_cap_type_alias(self): self.flags(alias=[ """{ "name": "xxx", @@ -138,7 +139,22 @@ def test_wrong_cap_type_aliase(self): self.assertRaises(exception.PciInvalidAlias, request._get_alias_from_config) - def test_dup_aliase(self): + def test_invalid_numa_policy(self): + self.flags(alias=[ + """{ + "name": "xxx", + "capability_type": "pci", + "product_id": "1111", + "vendor_id": "8086", + "device_type": "NIC", + "numa_policy": "derp" + }"""], + group='pci') + self.assertRaises(exception.PciInvalidAlias, + request._get_alias_from_config) + + def test_conflicting_device_type(self): + """Check behavior when device_type conflicts occur.""" self.flags(alias=[ """{ "name": "xxx", @@ -159,6 +175,28 @@ def test_dup_aliase(self): exception.PciInvalidAlias, request._get_alias_from_config) + def test_conflicting_numa_policy(self): + """Check behavior when numa_policy conflicts occur.""" + self.flags(alias=[ + """{ + "name": "xxx", + "capability_type": "pci", + "product_id": "1111", + "vendor_id": "8086", + "numa_policy": "required", + }""", + """{ + "name": "xxx", + "capability_type": "pci", + "product_id": "1111", + "vendor_id": "8086", + "numa_policy": "legacy", + }"""], + group='pci') + self.assertRaises( + exception.PciInvalidAlias, + request._get_alias_from_config) + def _verify_result(self, expected, real): exp_real = zip(expected, real) for exp, real in exp_real: @@ -166,7 +204,7 @@ def _verify_result(self, expected, real): self.assertEqual(exp['alias_name'], real.alias_name) self.assertEqual(exp['spec'], real.spec) - def test_aliase_2_request(self): + def test_alias_2_request(self): self.flags(alias=[_fake_alias1, _fake_alias3], group='pci') expect_request = [ {'count': 3, @@ -186,7 +224,7 @@ def test_aliase_2_request(self): self.assertEqual(set([p['count'] for p in requests]), set([1, 3])) self._verify_result(expect_request, requests) - def test_aliase_2_request_invalid(self): + def test_alias_2_request_invalid(self): self.flags(alias=[_fake_alias1, _fake_alias3], group='pci') self.assertRaises(exception.PciRequestAliasNotDefined, request._translate_alias_to_requests, diff --git a/nova/tests/unit/pci/test_stats.py b/nova/tests/unit/pci/test_stats.py index 31795d9c863..c492de8f624 100644 --- a/nova/tests/unit/pci/test_stats.py +++ b/nova/tests/unit/pci/test_stats.py @@ -65,6 +65,17 @@ class PciDeviceStatsTestCase(test.NoDBTestCase): + + @staticmethod + def _get_fake_requests(vendor_ids=None, numa_policy=None): + if not vendor_ids: + vendor_ids = ['v1', 'v2'] + + specs = [{'vendor_id': vendor_id} for vendor_id in vendor_ids] + + return [objects.InstancePCIRequest(count=1, spec=[spec], + numa_policy=numa_policy) for spec in specs] + def _create_fake_devs(self): self.fake_dev_1 = objects.PciDevice.create(None, fake_pci_1) self.fake_dev_2 = objects.PciDevice.create(None, fake_pci_2) @@ -75,6 +86,15 @@ def _create_fake_devs(self): self.fake_dev_3, self.fake_dev_4]: self.pci_stats.add_device(dev) + def _add_fake_devs_with_numa(self): + fake_pci = dict(fake_pci_1, vendor_id='v4', product_id='pr4') + devs = [dict(fake_pci, product_id='pr0', numa_node=0, ), + dict(fake_pci, product_id='pr1', numa_node=1), + dict(fake_pci, product_id='pr_none', numa_node=None)] + + for dev in devs: + self.pci_stats.add_device(objects.PciDevice.create(None, dev)) + def setUp(self): super(PciDeviceStatsTestCase, self).setUp() self.pci_stats = stats.PciDeviceStats() @@ -129,6 +149,17 @@ def test_object_create(self): self.assertEqual(set([d['vendor_id'] for d in new_stats]), set(['v1', 'v2', 'v3'])) + def test_apply_requests(self): + self.pci_stats.apply_requests(pci_requests) + self.assertEqual(len(self.pci_stats.pools), 2) + self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1') + self.assertEqual(self.pci_stats.pools[0]['count'], 1) + + def test_apply_requests_failed(self): + self.assertRaises(exception.PciDeviceRequestFailed, + self.pci_stats.apply_requests, + pci_requests_multiple) + def test_support_requests(self): self.assertTrue(self.pci_stats.support_requests(pci_requests)) self.assertEqual(len(self.pci_stats.pools), 3) @@ -142,16 +173,44 @@ def test_support_requests_failed(self): self.assertEqual(set([d['count'] for d in self.pci_stats]), set([1, 2])) - def test_apply_requests(self): - self.pci_stats.apply_requests(pci_requests) - self.assertEqual(len(self.pci_stats.pools), 2) - self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1') - self.assertEqual(self.pci_stats.pools[0]['count'], 1) + def test_support_requests_numa(self): + cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0), + objects.InstanceNUMACell(id=1, cpuset=set(), memory=0)] + self.assertTrue(self.pci_stats.support_requests(pci_requests, cells)) - def test_apply_requests_failed(self): - self.assertRaises(exception.PciDeviceRequestFailed, - self.pci_stats.apply_requests, - pci_requests_multiple) + def test_support_requests_numa_failed(self): + cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0)] + self.assertFalse(self.pci_stats.support_requests(pci_requests, cells)) + + def test_support_requests_no_numa_info(self): + cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0)] + pci_requests = self._get_fake_requests(vendor_ids=['v3']) + self.assertTrue(self.pci_stats.support_requests(pci_requests, cells)) + + # 'legacy' is the default numa_policy so the result must be same + pci_requests = self._get_fake_requests(vendor_ids=['v3'], + numa_policy = fields.PCINUMAAffinityPolicy.LEGACY) + self.assertTrue(self.pci_stats.support_requests(pci_requests, cells)) + + def test_support_requests_numa_pci_numa_policy_preferred(self): + # numa node 0 has 2 devices with vendor_id 'v1' + # numa node 1 has 1 device with vendor_id 'v2' + # we request two devices with vendor_id 'v1' and 'v2'. + # pci_numa_policy is 'preferred' so we can ignore numa affinity + cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0)] + pci_requests = self._get_fake_requests( + numa_policy=fields.PCINUMAAffinityPolicy.PREFERRED) + + self.assertTrue(self.pci_stats.support_requests(pci_requests, cells)) + + def test_support_requests_no_numa_info_pci_numa_policy_required(self): + # pci device with vendor_id 'v3' has numa_node=None. + # pci_numa_policy is 'required' so we can't use this device + cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0)] + pci_requests = self._get_fake_requests(vendor_ids=['v3'], + numa_policy=fields.PCINUMAAffinityPolicy.REQUIRED) + + self.assertFalse(self.pci_stats.support_requests(pci_requests, cells)) def test_consume_requests(self): devs = self.pci_stats.consume_requests(pci_requests) @@ -167,21 +226,6 @@ def test_consume_requests_failed(self): self.assertIsNone(self.pci_stats.consume_requests( pci_requests_multiple)) - def test_support_requests_numa(self): - cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0), - objects.InstanceNUMACell(id=1, cpuset=set(), memory=0)] - self.assertTrue(self.pci_stats.support_requests(pci_requests, cells)) - - def test_support_requests_numa_failed(self): - cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0)] - self.assertFalse(self.pci_stats.support_requests(pci_requests, cells)) - - def test_support_requests_no_numa_info(self): - cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0)] - pci_request = [objects.InstancePCIRequest(count=1, - spec=[{'vendor_id': 'v3'}])] - self.assertTrue(self.pci_stats.support_requests(pci_request, cells)) - def test_consume_requests_numa(self): cells = [objects.InstanceNUMACell(id=0, cpuset=set(), memory=0), objects.InstanceNUMACell(id=1, cpuset=set(), memory=0)] @@ -203,6 +247,115 @@ def test_consume_requests_no_numa_info(self): self.assertEqual(set(['v3']), set([dev.vendor_id for dev in devs])) + def _test_consume_requests_numa_policy(self, cell_ids, policy, + expected, vendor_id='v4'): + """Base test for 'consume_requests' function. + + Create three devices with vendor_id of 'v4': 'pr0' in NUMA node 0, + 'pr1' in NUMA node 1, 'pr_none' without NUMA affinity info. Attempt to + consume a PCI request with a single device with ``vendor_id`` using the + provided ``cell_ids`` and ``policy``. Compare result against + ``expected``. + """ + self._add_fake_devs_with_numa() + cells = [objects.InstanceNUMACell(id=id, cpuset=set(), memory=0) + for id in cell_ids] + + pci_requests = self._get_fake_requests(vendor_ids=[vendor_id], + numa_policy=policy) + devs = self.pci_stats.consume_requests(pci_requests, cells) + + if expected is None: + self.assertIsNone(devs) + else: + self.assertEqual(set(expected), + set([dev.product_id for dev in devs])) + + def test_consume_requests_numa_policy_required(self): + """Ensure REQUIRED policy will ensure NUMA affinity. + + Policy is 'required' which means we must use a device with strict NUMA + affinity. Request a device from NUMA node 0, which contains such a + device, and ensure it's used. + """ + self._test_consume_requests_numa_policy( + [0], fields.PCINUMAAffinityPolicy.REQUIRED, ['pr0']) + + def test_consume_requests_numa_policy_required_fail(self): + """Ensure REQUIRED policy will *only* provide NUMA affinity. + + Policy is 'required' which means we must use a device with strict NUMA + affinity. Request a device from NUMA node 999, which does not contain + any suitable devices, and ensure nothing is returned. + """ + self._test_consume_requests_numa_policy( + [999], fields.PCINUMAAffinityPolicy.REQUIRED, None) + + def test_consume_requests_numa_policy_legacy(self): + """Ensure LEGACY policy will ensure NUMA affinity if possible. + + Policy is 'legacy' which means we must use a device with strict NUMA + affinity or no provided NUMA affinity. Request a device from NUMA node + 0, which contains contains such a device, and ensure it's used. + """ + self._test_consume_requests_numa_policy( + [0], fields.PCINUMAAffinityPolicy.LEGACY, ['pr0']) + + def test_consume_requests_numa_policy_legacy_fallback(self): + """Ensure LEGACY policy will fallback to no NUMA affinity. + + Policy is 'legacy' which means we must use a device with strict NUMA + affinity or no provided NUMA affinity. Request a device from NUMA node + 999, which contains no such device, and ensure we fallback to the + device without any NUMA affinity. + """ + self._test_consume_requests_numa_policy( + [999], fields.PCINUMAAffinityPolicy.LEGACY, ['pr_none']) + + def test_consume_requests_numa_policy_legacy_fail(self): + """Ensure REQUIRED policy will *not* provide NUMA non-affinity. + + Policy is 'legacy' which means we must use a device with strict NUMA + affinity or no provided NUMA affinity. Request a device with + ``vendor_id`` of ``v2``, which can only be found in NUMA node 1, from + NUMA node 0, and ensure nothing is returned. + """ + self._test_consume_requests_numa_policy( + [0], fields.PCINUMAAffinityPolicy.LEGACY, None, vendor_id='v2') + + def test_consume_requests_numa_pci_numa_policy_preferred(self): + """Ensure PREFERRED policy will ensure NUMA affinity if possible. + + Policy is 'preferred' which means we must use a device with any level + of NUMA affinity. Request a device from NUMA node 0, which contains + contains an affined device, and ensure it's used. + """ + self._test_consume_requests_numa_policy( + [0], fields.PCINUMAAffinityPolicy.PREFERRED, ['pr0']) + + def test_consume_requests_numa_policy_preferred_fallback_a(self): + """Ensure PREFERRED policy will fallback to no NUMA affinity. + + Policy is 'preferred' which means we must use a device with any level + of NUMA affinity. Request a device from NUMA node 999, which contains + no such device, and ensure we fallback to the device without any NUMA + affinity. + """ + self._test_consume_requests_numa_policy( + [999], fields.PCINUMAAffinityPolicy.PREFERRED, ['pr_none']) + + def test_consume_requests_numa_pci_numa_policy_fallback_b(self): + """Ensure PREFERRED policy will fallback to different NUMA affinity. + + Policy is 'preferred' which means we must use a device with any level + of NUMA affinity. Request a device with ``vendor_id`` of ``v2``, which + can only be found in NUMA node 1, from NUMA node 0, and ensure we + fallback to this device. + """ + self._test_consume_requests_numa_policy( + [0], fields.PCINUMAAffinityPolicy.PREFERRED, ['p2'], + vendor_id='v2') + @mock.patch( 'nova.pci.whitelist.Whitelist._parse_white_list_from_config') def test_white_list_parsing(self, mock_whitelist_parse): @@ -289,7 +442,7 @@ def test_add_devices(self): self._create_pci_devices() self._assertPools() - def test_consume_reqeusts(self): + def test_consume_requests(self): self._create_pci_devices() pci_requests = [objects.InstancePCIRequest(count=1, spec=[{'physical_network': 'physnet1'}]), diff --git a/releasenotes/notes/share-pci-between-numa-nodes-0bd206eeca4ebcde.yaml b/releasenotes/notes/share-pci-between-numa-nodes-0bd206eeca4ebcde.yaml new file mode 100644 index 00000000000..f31715e4e2d --- /dev/null +++ b/releasenotes/notes/share-pci-between-numa-nodes-0bd206eeca4ebcde.yaml @@ -0,0 +1,17 @@ +--- +features: + - | + Added support for PCI device NUMA affinity policies. These allow you to + configure how strict your NUMA affinity should be on a per-device basis or, + more specifically, per device alias. These are configured as part of the + ``[pci] aliases`` configuration option. + + There are three policies supported: + + - ``required`` (must-have) + - ``legacy`` (must-have-if-available) (default) + - ``preferred`` (nice-to-have) + + In all cases, strict NUMA affinity is provided if possible. The key + difference between the policies is how much NUMA affinity one is willing to + forsake before failing to schedule.