From daac9a69500c318b5e9d94be0031a1c9506d0340 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Tue, 12 Dec 2017 18:39:17 -0500 Subject: [PATCH] Add nova-status check for ironic flavor migration In Pike we started requiring that ironic instances have their embedded flavor migrated to track the ironic node custom resource class. This can be done either via the normal running of the nova-compute service and ironic driver or via the 'nova-manage db ironic_flavor_migration' command. This change adds a nova-status check to see if there are any unmigrated ironic instances across all non-cell0 cells, and is based mostly on the same logic within the nova-manage command except it's multi-cell aware and doesn't use the objects. Conflicts: doc/source/cli/nova-status.rst NOTE(mriedem): The conflict is because the Rocky section in the man page does not exist in Queens. The note about the new check is added to the Queens section and mentions it was backported from Rocky. Change-Id: Ifd22325e849db2353b1b1eedfe998e3d6a79591c (cherry picked from commit 7eb670352126ed8d30426eb422fe5aed9cb21e7f) --- doc/source/cli/nova-status.rst | 2 + nova/cmd/status.py | 100 ++++++++++- nova/tests/unit/cmd/test_status.py | 156 ++++++++++++++++++ ...nic-flavor-migration-4c78314bf4e74ff6.yaml | 6 + 4 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/nova-status-check-ironic-flavor-migration-4c78314bf4e74ff6.yaml diff --git a/doc/source/cli/nova-status.rst b/doc/source/cli/nova-status.rst index cd145f6ab8d..88cfbf66e40 100644 --- a/doc/source/cli/nova-status.rst +++ b/doc/source/cli/nova-status.rst @@ -104,6 +104,8 @@ Upgrade **17.0.0 (Queens)** * Checks for the Placement API are modified to require version 1.17. + * Checks that ironic instances have had their embedded flavors migrated to + use custom resource classes. This check was backported from Rocky. See Also ======== diff --git a/nova/cmd/status.py b/nova/cmd/status.py index a234662d3fb..d4c5cdddf6a 100644 --- a/nova/cmd/status.py +++ b/nova/cmd/status.py @@ -18,6 +18,7 @@ from __future__ import print_function +import collections # enum comes from the enum34 package if python < 3.4, else it's stdlib import enum import functools @@ -27,10 +28,11 @@ from keystoneauth1 import exceptions as ks_exc from oslo_config import cfg +from oslo_serialization import jsonutils import pkg_resources import prettytable from sqlalchemy import func as sqlfunc -from sqlalchemy import MetaData, Table, select +from sqlalchemy import MetaData, Table, and_, select from nova.cmd import common as cmd_common import nova.conf @@ -344,6 +346,100 @@ def _check_resource_providers(self): # We have RPs >= CNs which is what we want to see. return UpgradeCheckResult(UpgradeCheckCode.SUCCESS) + @staticmethod + def _is_ironic_instance_migrated(extras, inst): + extra = (extras.select().where(extras.c.instance_uuid == inst['uuid'] + ).execute().first()) + # Pull the flavor and deserialize it. Note that the flavor info for an + # instance is a dict keyed by "cur", "old", "new" and we want the + # current flavor. + flavor = jsonutils.loads(extra['flavor'])['cur']['nova_object.data'] + # Do we have a custom resource flavor extra spec? + specs = flavor['extra_specs'] if 'extra_specs' in flavor else {} + for spec_key in specs: + if spec_key.startswith('resources:CUSTOM_'): + # We found a match so this instance is good. + return True + return False + + def _check_ironic_flavor_migration(self): + """In Pike, ironic instances and flavors need to be migrated to use + custom resource classes. In ironic, the node.resource_class should be + set to some custom resource class value which should match a + "resources:" flavor extra spec on baremetal + flavors. Existing ironic instances will have their embedded + instance.flavor.extra_specs migrated to use the matching ironic + node.resource_class value in the nova-compute service, or they can + be forcefully migrated using "nova-manage db ironic_flavor_migration". + + In this check, we look for all ironic compute nodes in all non-cell0 + cells, and from those ironic compute nodes, we look for an instance + that has a "resources:CUSTOM_*" key in it's embedded flavor extra + specs. + """ + cell_mappings = self._get_non_cell0_mappings() + ctxt = nova_context.get_admin_context() + # dict of cell identifier (name or uuid) to number of unmigrated + # instances + unmigrated_instance_count_by_cell = collections.defaultdict(int) + for cell_mapping in cell_mappings: + with nova_context.target_cell(ctxt, cell_mapping) as cctxt: + # Get the (non-deleted) ironic compute nodes in this cell. + meta = MetaData(bind=db_session.get_engine(context=cctxt)) + compute_nodes = Table('compute_nodes', meta, autoload=True) + ironic_nodes = ( + compute_nodes.select().where(and_( + compute_nodes.c.hypervisor_type == 'ironic', + compute_nodes.c.deleted == 0 + )).execute().fetchall()) + + if ironic_nodes: + # We have ironic nodes in this cell, let's iterate over + # them looking for instances. + instances = Table('instances', meta, autoload=True) + extras = Table('instance_extra', meta, autoload=True) + for node in ironic_nodes: + nodename = node['hypervisor_hostname'] + # Get any (non-deleted) instances for this node. + ironic_instances = ( + instances.select().where(and_( + instances.c.node == nodename, + instances.c.deleted == 0 + )).execute().fetchall()) + # Get the instance_extras for each instance so we can + # find the flavors. + for inst in ironic_instances: + if not self._is_ironic_instance_migrated( + extras, inst): + # We didn't find the extra spec key for this + # instance so increment the number of + # unmigrated instances in this cell. + unmigrated_instance_count_by_cell[ + cell_mapping['uuid']] += 1 + + if not cell_mappings: + # There are no non-cell0 mappings so we can't determine this, just + # return a warning. The cellsv2 check would have already failed + # on this. + msg = (_('Unable to determine ironic flavor migration without ' + 'cell mappings.')) + return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg) + + if unmigrated_instance_count_by_cell: + # There are unmigrated ironic instances, so we need to fail. + msg = (_('There are (cell=x) number of unmigrated instances in ' + 'each cell: %s. Run \'nova-manage db ' + 'ironic_flavor_migration\' on each cell.') % + ' '.join('(%s=%s)' % ( + cell_id, unmigrated_instance_count_by_cell[cell_id]) + for cell_id in + sorted(unmigrated_instance_count_by_cell.keys()))) + return UpgradeCheckResult(UpgradeCheckCode.FAILURE, msg) + + # Either there were no ironic compute nodes or all instances for + # those nodes are already migrated, so there is nothing to do. + return UpgradeCheckResult(UpgradeCheckCode.SUCCESS) + # The format of the check functions is to return an UpgradeCheckResult # object with the appropriate UpgradeCheckCode and details set. If the # check hits warnings or failures then those should be stored in the @@ -358,6 +454,8 @@ def _check_resource_providers(self): (_('Placement API'), _check_placement), # Added in Ocata (_('Resource Providers'), _check_resource_providers), + # Added in Rocky (but also useful going back to Pike) + (_('Ironic Flavor Migration'), _check_ironic_flavor_migration) ) def _get_details(self, upgrade_check_result): diff --git a/nova/tests/unit/cmd/test_status.py b/nova/tests/unit/cmd/test_status.py index a8d31728c4c..6712f58bd23 100644 --- a/nova/tests/unit/cmd/test_status.py +++ b/nova/tests/unit/cmd/test_status.py @@ -656,3 +656,159 @@ def test_check_resource_providers_equal_rps_to_computes(self): result = self.cmd._check_resource_providers() self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) self.assertIsNone(result.details) + + +class TestUpgradeCheckIronicFlavorMigration(test.NoDBTestCase): + """Tests for the nova-status upgrade check on ironic flavor migration.""" + + # We'll setup the database ourselves because we need to use cells fixtures + # for multiple cell mappings. + USES_DB_SELF = True + + # This will create three cell mappings: cell0, cell1 (default) and cell2 + NUMBER_OF_CELLS = 2 + + def setUp(self): + super(TestUpgradeCheckIronicFlavorMigration, self).setUp() + self.output = StringIO() + self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output)) + # We always need the API DB to be setup. + self.useFixture(nova_fixtures.Database(database='api')) + self.cmd = status.UpgradeCommands() + + @staticmethod + def _create_node_in_cell(ctxt, cell, hypervisor_type, nodename): + with context.target_cell(ctxt, cell) as cctxt: + cn = objects.ComputeNode( + context=cctxt, + hypervisor_type=hypervisor_type, + hypervisor_hostname=nodename, + # The rest of these values are fakes. + host=uuids.host, + vcpus=4, + memory_mb=8 * 1024, + local_gb=40, + vcpus_used=2, + memory_mb_used=2 * 1024, + local_gb_used=10, + hypervisor_version=1, + cpu_info='{"arch": "x86_64"}') + cn.create() + return cn + + @staticmethod + def _create_instance_in_cell(ctxt, cell, node, is_deleted=False, + flavor_migrated=False): + with context.target_cell(ctxt, cell) as cctxt: + inst = objects.Instance( + context=cctxt, + host=node.host, + node=node.hypervisor_hostname, + uuid=uuidutils.generate_uuid()) + inst.create() + + if is_deleted: + inst.destroy() + else: + # Create an embedded flavor for the instance. We don't create + # this because we're in a cell context and flavors are global, + # but we don't actually care about global flavors in this + # check. + extra_specs = {} + if flavor_migrated: + extra_specs['resources:CUSTOM_BAREMETAL_GOLD'] = '1' + inst.flavor = objects.Flavor(cctxt, extra_specs=extra_specs) + inst.old_flavor = None + inst.new_flavor = None + inst.save() + + return inst + + def test_fresh_install_no_cell_mappings(self): + """Tests the scenario where we don't have any cell mappings (no cells + v2 setup yet) so we don't know what state we're in and we return a + warning. + """ + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.WARNING, result.code) + self.assertIn('Unable to determine ironic flavor migration without ' + 'cell mappings', result.details) + + def test_fresh_install_no_computes(self): + """Tests a fresh install scenario where we have two non-cell0 cells + but no compute nodes in either cell yet, so there is nothing to do + and we return success. + """ + self._setup_cells() + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) + + def test_mixed_computes_deleted_ironic_instance(self): + """Tests the scenario where we have a kvm compute node in one cell + and an ironic compute node in another cell. The kvm compute node does + not have any instances. The ironic compute node has an instance with + the same hypervisor_hostname match but the instance is (soft) deleted + so it's ignored. + """ + self._setup_cells() + ctxt = context.get_admin_context() + # Create the ironic compute node in cell1 + ironic_node = self._create_node_in_cell( + ctxt, self.cell_mappings['cell1'], 'ironic', uuids.node_uuid) + # Create the kvm compute node in cell2 + self._create_node_in_cell( + ctxt, self.cell_mappings['cell2'], 'kvm', 'fake-kvm-host') + + # Now create an instance in cell1 which is on the ironic node but is + # soft deleted (instance.deleted == instance.id). + self._create_instance_in_cell( + ctxt, self.cell_mappings['cell1'], ironic_node, is_deleted=True) + + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) + + def test_unmigrated_ironic_instances(self): + """Tests a scenario where we have two cells with only ironic compute + nodes. The first cell has one migrated and one unmigrated instance. + The second cell has two unmigrated instances. The result is the check + returns failure. + """ + self._setup_cells() + ctxt = context.get_admin_context() + + # Create the ironic compute nodes in cell1 + for x in range(2): + cell = self.cell_mappings['cell1'] + ironic_node = self._create_node_in_cell( + ctxt, cell, 'ironic', getattr(uuids, 'cell1-node-%d' % x)) + # Create an instance for this node. In cell1, we have one + # migrated and one unmigrated instance. + flavor_migrated = True if x % 2 else False + self._create_instance_in_cell( + ctxt, cell, ironic_node, flavor_migrated=flavor_migrated) + + # Create the ironic compute nodes in cell2 + for x in range(2): + cell = self.cell_mappings['cell2'] + ironic_node = self._create_node_in_cell( + ctxt, cell, 'ironic', getattr(uuids, 'cell2-node-%d' % x)) + # Create an instance for this node. In cell2, all instances are + # unmigrated. + self._create_instance_in_cell( + ctxt, cell, ironic_node, flavor_migrated=False) + + result = self.cmd._check_ironic_flavor_migration() + self.assertEqual(status.UpgradeCheckCode.FAILURE, result.code) + # Check the message - it should point out cell1 has one unmigrated + # instance and cell2 has two unmigrated instances. + unmigrated_instance_count_by_cell = { + self.cell_mappings['cell1'].uuid: 1, + self.cell_mappings['cell2'].uuid: 2 + } + self.assertIn( + 'There are (cell=x) number of unmigrated instances in each ' + 'cell: %s.' % ' '.join('(%s=%s)' % ( + cell_id, unmigrated_instance_count_by_cell[cell_id]) + for cell_id in + sorted(unmigrated_instance_count_by_cell.keys())), + result.details) diff --git a/releasenotes/notes/nova-status-check-ironic-flavor-migration-4c78314bf4e74ff6.yaml b/releasenotes/notes/nova-status-check-ironic-flavor-migration-4c78314bf4e74ff6.yaml new file mode 100644 index 00000000000..c4a5615bd81 --- /dev/null +++ b/releasenotes/notes/nova-status-check-ironic-flavor-migration-4c78314bf4e74ff6.yaml @@ -0,0 +1,6 @@ +--- +upgrade: + - | + A new check is added to the ``nova-status upgrade check`` CLI which can + assist with determining if ironic instances have had their embedded flavor + migrated to use the corresponding ironic node custom resource class.