Skip to content

Commit

Permalink
Fix reporting inventory for provisioned nodes in the Ironic driver
Browse files Browse the repository at this point in the history
Currently we report the full inventory for available nodes, and an empty
inventory for nodes that are deployed to or otherwise unavailable.

Reporting an empty inventory for deployed nodes has 2 bad consequences:
1. Nova tries deleting the inventory for Placement, which fails, because
   the resources are still in use. This results in nasty warnings.
2. When adding a resource class to a deployed node, it does not get into
   inventory, and thus does not get to Placement. It results in an error
   later on, when the custom resource class is not found.

This patch fixes the latter problem by
1. Always reporting the custom resource class for deployed nodes, if present.
2. Reporting VCPUS/memory/disk in exactly the same amount, as it is configured
   in the ironic node's properties.

As a side effect, the warnings are no longer shown for deployed nodes.
They still appear, however, for nodes during cleaning.

Partial-Bug: #1710141
Change-Id: I2fd1e4a95f000da19864e75299afa51527697101
  • Loading branch information
dtantsur committed Aug 15, 2017
1 parent 4cd6a3a commit 9ed692b
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 48 deletions.
113 changes: 78 additions & 35 deletions nova/tests/unit/virt/ironic/test_driver.py
Expand Up @@ -256,17 +256,12 @@ def test__wait_for_power_state_ok(self, fake_validate):
self.driver._wait_for_power_state, instance, 'fake message')
self.assertTrue(fake_validate.called)

def _test__node_resource(self, has_inst_info):
def test__node_resource_with_instance_uuid(self):
node_uuid = uuidutils.generate_uuid()
props = _get_properties()
stats = _get_stats()
if has_inst_info:
instance_info = _get_instance_info()
else:
instance_info = {}
node = ironic_utils.get_test_node(uuid=node_uuid,
instance_uuid=self.instance_uuid,
instance_info=instance_info,
properties=props,
resource_class='foo')

Expand All @@ -285,30 +280,18 @@ def _test__node_resource(self, has_inst_info):
gotkeys = sorted(result.keys())
self.assertEqual(wantkeys, gotkeys)

if has_inst_info:
props_dict = instance_info
expected_cpus = instance_info['vcpus']
else:
props_dict = props
expected_cpus = props['cpus']
self.assertEqual(0, result['vcpus'])
self.assertEqual(expected_cpus, result['vcpus_used'])
self.assertEqual(0, result['memory_mb'])
self.assertEqual(props_dict['memory_mb'], result['memory_mb_used'])
self.assertEqual(0, result['local_gb'])
self.assertEqual(props_dict['local_gb'], result['local_gb_used'])
self.assertEqual(props['cpus'], result['vcpus'])
self.assertEqual(result['vcpus'], result['vcpus_used'])
self.assertEqual(props['memory_mb'], result['memory_mb'])
self.assertEqual(result['memory_mb'], result['memory_mb_used'])
self.assertEqual(props['local_gb'], result['local_gb'])
self.assertEqual(result['local_gb'], result['local_gb_used'])

self.assertEqual(node_uuid, result['hypervisor_hostname'])
self.assertEqual(stats, result['stats'])
self.assertEqual('foo', result['resource_class'])
self.assertIsNone(result['numa_topology'])

def test__node_resource(self):
self._test__node_resource(True)

def test__node_resource_no_instance_info(self):
self._test__node_resource(False)

def test__node_resource_canonicalizes_arch(self):
node_uuid = uuidutils.generate_uuid()
props = _get_properties()
Expand Down Expand Up @@ -411,12 +394,12 @@ def test__node_resource_used_node_res(self, mock_res_used):
instance_info=instance_info)

result = self.driver._node_resource(node)
self.assertEqual(0, result['vcpus'])
self.assertEqual(instance_info['vcpus'], result['vcpus_used'])
self.assertEqual(0, result['memory_mb'])
self.assertEqual(instance_info['memory_mb'], result['memory_mb_used'])
self.assertEqual(0, result['local_gb'])
self.assertEqual(instance_info['local_gb'], result['local_gb_used'])
self.assertEqual(props['cpus'], result['vcpus'])
self.assertEqual(result['vcpus'], result['vcpus_used'])
self.assertEqual(props['memory_mb'], result['memory_mb'])
self.assertEqual(result['memory_mb'], result['memory_mb_used'])
self.assertEqual(props['local_gb'], result['local_gb'])
self.assertEqual(result['local_gb'], result['local_gb_used'])
self.assertEqual(node_uuid, result['hypervisor_hostname'])
self.assertEqual(stats, result['stats'])

Expand Down Expand Up @@ -683,11 +666,6 @@ def test__node_resources_unavailable(self):
{'uuid': uuidutils.generate_uuid(),
'power_state': ironic_states.POWER_ON,
'provision_state': ironic_states.DELETED},
# a node in AVAILABLE with an instance uuid
{'uuid': uuidutils.generate_uuid(),
'instance_uuid': uuidutils.generate_uuid(),
'power_state': ironic_states.POWER_OFF,
'provision_state': ironic_states.AVAILABLE}
]
for n in node_dicts:
node = ironic_utils.get_test_node(**n)
Expand Down Expand Up @@ -761,8 +739,11 @@ def test_get_inventory_no_rc(self, mock_nfc, mock_nr):
"""
mock_nr.return_value = {
'vcpus': 24,
'vcpus_used': 0,
'memory_mb': 1024,
'memory_mb_used': 0,
'local_gb': 100,
'local_gb_used': 0,
'resource_class': None,
}

Expand Down Expand Up @@ -807,8 +788,67 @@ def test_get_inventory_with_rc(self, mock_nfc, mock_nr):
"""
mock_nr.return_value = {
'vcpus': 24,
'vcpus_used': 0,
'memory_mb': 1024,
'memory_mb_used': 0,
'local_gb': 100,
'local_gb_used': 0,
'resource_class': 'iron-nfv',
}

result = self.driver.get_inventory(mock.sentinel.nodename)

expected = {
fields.ResourceClass.VCPU: {
'total': 24,
'reserved': 0,
'min_unit': 1,
'max_unit': 24,
'step_size': 1,
'allocation_ratio': 1.0,
},
fields.ResourceClass.MEMORY_MB: {
'total': 1024,
'reserved': 0,
'min_unit': 1,
'max_unit': 1024,
'step_size': 1,
'allocation_ratio': 1.0,
},
fields.ResourceClass.DISK_GB: {
'total': 100,
'reserved': 0,
'min_unit': 1,
'max_unit': 100,
'step_size': 1,
'allocation_ratio': 1.0,
},
'CUSTOM_IRON_NFV': {
'total': 1,
'reserved': 0,
'min_unit': 1,
'max_unit': 1,
'step_size': 1,
'allocation_ratio': 1.0,
},
}
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
self.assertEqual(expected, result)

@mock.patch.object(ironic_driver.IronicDriver, '_node_resource')
@mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache')
def test_get_inventory_with_rc_occupied(self, mock_nfc, mock_nr):
"""Ensure that when a node is used, we report the inventory matching
the consumed resources.
"""
mock_nr.return_value = {
'vcpus': 24,
'vcpus_used': 24,
'memory_mb': 1024,
'memory_mb_used': 1024,
'local_gb': 100,
'local_gb_used': 100,
'resource_class': 'iron-nfv',
}

Expand Down Expand Up @@ -860,8 +900,11 @@ def test_get_inventory_disabled_node(self, mock_nfc, mock_nr):
"""
mock_nr.return_value = {
'vcpus': 0,
'vcpus_used': 0,
'memory_mb': 0,
'memory_mb_used': 0,
'local_gb': 0,
'local_gb_used': 0,
'resource_class': None,
}

Expand Down
24 changes: 11 additions & 13 deletions nova/virt/ironic/driver.py
Expand Up @@ -189,9 +189,7 @@ def _node_resources_unavailable(self, node_obj):
ironic_states.AVAILABLE, ironic_states.NOSTATE]
return (node_obj.maintenance or
node_obj.power_state in bad_power_states or
node_obj.provision_state not in good_provision_states or
(node_obj.provision_state in good_provision_states and
node_obj.instance_uuid is not None))
node_obj.provision_state not in good_provision_states)

def _node_resources_used(self, node_obj):
"""Determine whether the node's resources are currently used.
Expand Down Expand Up @@ -309,19 +307,13 @@ def _node_resource(self, node):
# Node is in the process of deploying, is deployed, or is in
# the process of cleaning up from a deploy. Report all of its
# resources as in use.
instance_info = self._parse_node_instance_info(node, properties)

# Use instance_info instead of properties here is because the
# properties of a deployed node can be changed which will count
# as available resources.
vcpus_used = vcpus = instance_info['vcpus']
memory_mb_used = memory_mb = instance_info['memory_mb']
local_gb_used = local_gb = instance_info['local_gb']

vcpus_used = vcpus
memory_mb_used = memory_mb
local_gb_used = local_gb
# Always checking allows us to catch the case where Nova thinks there
# are available resources on the Node, but Ironic does not (because it
# is not in a usable state): https://launchpad.net/bugs/1503453
if self._node_resources_unavailable(node):
elif self._node_resources_unavailable(node):
# The node's current state is such that it should not present any
# of its resources to Nova
vcpus = 0
Expand Down Expand Up @@ -743,6 +735,12 @@ def get_inventory(self, nodename):
# node is "disabled". In the future, we should detach inventory
# accounting from the concept of a node being disabled or not. The
# two things don't really have anything to do with each other.
LOG.debug('Node %(node)s is not ready for a deployment, '
'reporting an empty inventory for it. Node\'s '
'provision state is %(prov)s, power state is '
'%(power)s and maintenance is %(maint)s.',
{'node': node.uuid, 'prov': node.provision_state,
'power': node.power_state, 'maint': node.maintenance})
return {}

result = {
Expand Down
30 changes: 30 additions & 0 deletions releasenotes/notes/fix-ironic-inventory-d565c77af83c710d.yaml
@@ -0,0 +1,30 @@
---
fixes:
- |
The ironic virt driver no longer reports an empty inventory for bare metal
nodes that have instances on them. Instead the custom resource class, VCPU,
memory and disk are reported as they are configured on the node.
issues:
- |
Due to the changes in scheduling of bare metal nodes, additional resources
may be reported as free to Placement. This happens in two cases:
* An instance is deployed with a flavor smaller than a node (only possible
when exact filters are not used)
* Node properties were modified in ironic for a deployed nodes
When such instances were deployed without using a custom resource class,
it is possible for the scheduler to try deploying another instance on
the same node. It will cause a failure in the compute and a scheduling
retry.
The recommended work around is to assign a resource class to all ironic
nodes, and use it for scheduling of bare metal instances.
deprecations:
- |
Scheduling bare metal (ironic) instances using standard resource classes
(VCPU, memory, disk) is deprecated and will no longer be supported in
Queens. Custom resource classes should be used instead.
Please refer to the `ironic documentation
<https://docs.openstack.org/ironic/latest/install/configure-nova-flavors.html#scheduling-based-on-resource-classes>`_
for a detailed explanation.

0 comments on commit 9ed692b

Please sign in to comment.