diff --git a/doc/source/cli/nova-status.rst b/doc/source/cli/nova-status.rst index cd145f6ab8d..88cfbf66e40 100644 --- a/doc/source/cli/nova-status.rst +++ b/doc/source/cli/nova-status.rst @@ -104,6 +104,8 @@ Upgrade **17.0.0 (Queens)** * Checks for the Placement API are modified to require version 1.17. + * Checks that ironic instances have had their embedded flavors migrated to + use custom resource classes. This check was backported from Rocky. See Also ======== diff --git a/nova/cmd/status.py b/nova/cmd/status.py index a234662d3fb..d4c5cdddf6a 100644 --- a/nova/cmd/status.py +++ b/nova/cmd/status.py @@ -18,6 +18,7 @@ from __future__ import print_function +import collections # enum comes from the enum34 package if python < 3.4, else it's stdlib import enum import functools @@ -27,10 +28,11 @@ from keystoneauth1 import exceptions as ks_exc from oslo_config import cfg +from oslo_serialization import jsonutils import pkg_resources import prettytable from sqlalchemy import func as sqlfunc -from sqlalchemy import MetaData, Table, select +from sqlalchemy import MetaData, Table, and_, select from nova.cmd import common as cmd_common import nova.conf @@ -344,6 +346,100 @@ def _check_resource_providers(self): # We have RPs >= CNs which is what we want to see. return UpgradeCheckResult(UpgradeCheckCode.SUCCESS) + @staticmethod + def _is_ironic_instance_migrated(extras, inst): + extra = (extras.select().where(extras.c.instance_uuid == inst['uuid'] + ).execute().first()) + # Pull the flavor and deserialize it. Note that the flavor info for an + # instance is a dict keyed by "cur", "old", "new" and we want the + # current flavor. + flavor = jsonutils.loads(extra['flavor'])['cur']['nova_object.data'] + # Do we have a custom resource flavor extra spec? + specs = flavor['extra_specs'] if 'extra_specs' in flavor else {} + for spec_key in specs: + if spec_key.startswith('resources:CUSTOM_'): + # We found a match so this instance is good. + return True + return False + + def _check_ironic_flavor_migration(self): + """In Pike, ironic instances and flavors need to be migrated to use + custom resource classes. In ironic, the node.resource_class should be + set to some custom resource class value which should match a + "resources:" flavor extra spec on baremetal + flavors. Existing ironic instances will have their embedded + instance.flavor.extra_specs migrated to use the matching ironic + node.resource_class value in the nova-compute service, or they can + be forcefully migrated using "nova-manage db ironic_flavor_migration". + + In this check, we look for all ironic compute nodes in all non-cell0 + cells, and from those ironic compute nodes, we look for an instance + that has a "resources:CUSTOM_*" key in it's embedded flavor extra + specs. + """ + cell_mappings = self._get_non_cell0_mappings() + ctxt = nova_context.get_admin_context() + # dict of cell identifier (name or uuid) to number of unmigrated + # instances + unmigrated_instance_count_by_cell = collections.defaultdict(int) + for cell_mapping in cell_mappings: + with nova_context.target_cell(ctxt, cell_mapping) as cctxt: + # Get the (non-deleted) ironic compute nodes in this cell. + meta = MetaData(bind=db_session.get_engine(context=cctxt)) + compute_nodes = Table('compute_nodes', meta, autoload=True) + ironic_nodes = ( + compute_nodes.select().where(and_( + compute_nodes.c.hypervisor_type == 'ironic', + compute_nodes.c.deleted == 0 + )).execute().fetchall()) + + if ironic_nodes: + # We have ironic nodes in this cell, let's iterate over + # them looking for instances. + instances = Table('instances', meta, autoload=True) + extras = Table('instance_extra', meta, autoload=True) + for node in ironic_nodes: + nodename = node['hypervisor_hostname'] + # Get any (non-deleted) instances for this node. + ironic_instances = ( + instances.select().where(and_( + instances.c.node == nodename, + instances.c.deleted == 0 + )).execute().fetchall()) + # Get the instance_extras for each instance so we can + # find the flavors. + for inst in ironic_instances: + if not self._is_ironic_instance_migrated( + extras, inst): + # We didn't find the extra spec key for this + # instance so increment the number of + # unmigrated instances in this cell. + unmigrated_instance_count_by_cell[ + cell_mapping['uuid']] += 1 + + if not cell_mappings: + # There are no non-cell0 mappings so we can't determine this, just + # return a warning. The cellsv2 check would have already failed + # on this. + msg = (_('Unable to determine ironic flavor migration without ' + 'cell mappings.')) + return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg) + + if unmigrated_instance_count_by_cell: + # There are unmigrated ironic instances, so we need to fail. + msg = (_('There are (cell=x) number of unmigrated instances in ' + 'each cell: %s. Run \'nova-manage db ' + 'ironic_flavor_migration\' on each cell.') % + ' '.join('(%s=%s)' % ( + cell_id, unmigrated_instance_count_by_cell[cell_id]) + for cell_id in + sorted(unmigrated_instance_count_by_cell.keys()))) + return UpgradeCheckResult(UpgradeCheckCode.FAILURE, msg) + + # Either there were no ironic compute nodes or all instances for + # those nodes are already migrated, so there is nothing to do. + return UpgradeCheckResult(UpgradeCheckCode.SUCCESS) + # The format of the check functions is to return an UpgradeCheckResult # object with the appropriate UpgradeCheckCode and details set. If the # check hits warnings or failures then those should be stored in the @@ -358,6 +454,8 @@ def _check_resource_providers(self): (_('Placement API'), _check_placement), # Added in Ocata (_('Resource Providers'), _check_resource_providers), + # Added in Rocky (but also useful going back to Pike) + (_('Ironic Flavor Migration'), _check_ironic_flavor_migration) ) def _get_details(self, upgrade_check_result): diff --git a/nova/tests/unit/cmd/test_status.py b/nova/tests/unit/cmd/test_status.py index a8d31728c4c..6712f58bd23 100644 --- a/nova/tests/unit/cmd/test_status.py +++ b/nova/tests/unit/cmd/test_status.py @@ -656,3 +656,159 @@ def test_check_resource_providers_equal_rps_to_computes(self): result = self.cmd._check_resource_providers() self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) self.assertIsNone(result.details) + + +class TestUpgradeCheckIronicFlavorMigration(test.NoDBTestCase): + """Tests for the nova-status upgrade check on ironic flavor migration.""" + + # We'll setup the database ourselves because we need to use cells fixtures + # for multiple cell mappings. + USES_DB_SELF = True + + # This will create three cell mappings: cell0, cell1 (default) and cell2 + NUMBER_OF_CELLS = 2 + + def setUp(self): + super(TestUpgradeCheckIronicFlavorMigration, self).setUp() + self.output = StringIO() + self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output)) + # We always need the API DB to be setup. + self.useFixture(nova_fixtures.Database(database='api')) + self.cmd = status.UpgradeCommands() + + @staticmethod + def _create_node_in_cell(ctxt, cell, hypervisor_type, nodename): + with context.target_cell(ctxt, cell) as cctxt: + cn = objects.ComputeNode( + context=cctxt, + hypervisor_type=hypervisor_type, + hypervisor_hostname=nodename, + # The rest of these values are fakes. + host=uuids.host, + vcpus=4, + memory_mb=8 * 1024, + local_gb=40, + vcpus_used=2, + memory_mb_used=2 * 1024, + local_gb_used=10, + hypervisor_version=1, + cpu_info='{"arch": "x86_64"}') + cn.create() + return cn + + @staticmethod + def _create_instance_in_cell(ctxt, cell, node, is_deleted=False, + flavor_migrated=False): + with context.target_cell(ctxt, cell) as cctxt: + inst = objects.Instance( + context=cctxt, + host=node.host, + node=node.hypervisor_hostname, + uuid=uuidutils.generate_uuid()) + inst.create() + + if is_deleted: + inst.destroy() + else: + # Create an embedded flavor for the instance. We don't create + # this because we're in a cell context and flavors are global, + # but we don't actually care about global flavors in this + # check. + extra_specs = {} + if flavor_migrated: + extra_specs['resources:CUSTOM_BAREMETAL_GOLD'] = '1' + inst.flavor = objects.Flavor(cctxt, extra_specs=extra_specs) + inst.old_flavor = None + inst.new_flavor = None + inst.save() + + return inst + + def test_fresh_install_no_cell_mappings(self): + """Tests the scenario where we don't have any cell mappings (no cells + v2 setup yet) so we don't know what state we're in and we return a + warning. + """ + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.WARNING, result.code) + self.assertIn('Unable to determine ironic flavor migration without ' + 'cell mappings', result.details) + + def test_fresh_install_no_computes(self): + """Tests a fresh install scenario where we have two non-cell0 cells + but no compute nodes in either cell yet, so there is nothing to do + and we return success. + """ + self._setup_cells() + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) + + def test_mixed_computes_deleted_ironic_instance(self): + """Tests the scenario where we have a kvm compute node in one cell + and an ironic compute node in another cell. The kvm compute node does + not have any instances. The ironic compute node has an instance with + the same hypervisor_hostname match but the instance is (soft) deleted + so it's ignored. + """ + self._setup_cells() + ctxt = context.get_admin_context() + # Create the ironic compute node in cell1 + ironic_node = self._create_node_in_cell( + ctxt, self.cell_mappings['cell1'], 'ironic', uuids.node_uuid) + # Create the kvm compute node in cell2 + self._create_node_in_cell( + ctxt, self.cell_mappings['cell2'], 'kvm', 'fake-kvm-host') + + # Now create an instance in cell1 which is on the ironic node but is + # soft deleted (instance.deleted == instance.id). + self._create_instance_in_cell( + ctxt, self.cell_mappings['cell1'], ironic_node, is_deleted=True) + + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) + + def test_unmigrated_ironic_instances(self): + """Tests a scenario where we have two cells with only ironic compute + nodes. The first cell has one migrated and one unmigrated instance. + The second cell has two unmigrated instances. The result is the check + returns failure. + """ + self._setup_cells() + ctxt = context.get_admin_context() + + # Create the ironic compute nodes in cell1 + for x in range(2): + cell = self.cell_mappings['cell1'] + ironic_node = self._create_node_in_cell( + ctxt, cell, 'ironic', getattr(uuids, 'cell1-node-%d' % x)) + # Create an instance for this node. In cell1, we have one + # migrated and one unmigrated instance. + flavor_migrated = True if x % 2 else False + self._create_instance_in_cell( + ctxt, cell, ironic_node, flavor_migrated=flavor_migrated) + + # Create the ironic compute nodes in cell2 + for x in range(2): + cell = self.cell_mappings['cell2'] + ironic_node = self._create_node_in_cell( + ctxt, cell, 'ironic', getattr(uuids, 'cell2-node-%d' % x)) + # Create an instance for this node. In cell2, all instances are + # unmigrated. + self._create_instance_in_cell( + ctxt, cell, ironic_node, flavor_migrated=False) + + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.FAILURE, result.code) + # Check the message - it should point out cell1 has one unmigrated + # instance and cell2 has two unmigrated instances. + unmigrated_instance_count_by_cell = { + self.cell_mappings['cell1'].uuid: 1, + self.cell_mappings['cell2'].uuid: 2 + } + self.assertIn( + 'There are (cell=x) number of unmigrated instances in each ' + 'cell: %s.' % ' '.join('(%s=%s)' % ( + cell_id, unmigrated_instance_count_by_cell[cell_id]) + for cell_id in + sorted(unmigrated_instance_count_by_cell.keys())), + result.details) diff --git a/releasenotes/notes/nova-status-check-ironic-flavor-migration-4c78314bf4e74ff6.yaml b/releasenotes/notes/nova-status-check-ironic-flavor-migration-4c78314bf4e74ff6.yaml new file mode 100644 index 00000000000..c4a5615bd81 --- /dev/null +++ b/releasenotes/notes/nova-status-check-ironic-flavor-migration-4c78314bf4e74ff6.yaml @@ -0,0 +1,6 @@ +--- +upgrade: + - | + A new check is added to the ``nova-status upgrade check`` CLI which can + assist with determining if ironic instances have had their embedded flavor + migrated to use the corresponding ironic node custom resource class.