From 03c596a9f4324e572bc04d4bbad09a6d3d47366c Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Fri, 6 Jul 2018 09:09:05 -0700 Subject: [PATCH] Avoid requesting DISK_GB allocation for root_gb on BFV instances Right now, we still ask placement for a disk allocation covering swap, ephemeral, and root disks for the instance even if the instance is going to be volume-backed. This patch makes us not include the root size in that calculation for placement, avoiding a false failure because the volume size is counted against the compute's available space. To do this, we need another flag in request_spec to track the BFV-ness of the instance. Right now, this patch just sets that on new builds and the scheduler client assumes a lack of said flag as "I don't know, so assume not-BFV" for compatibility. A subsequent patch can calculate that flag for existing instances so that we will be able to heal over time by migrating instances or re-writing their allocations to reflect reality. Partial-Bug: #1469179 Change-Id: I9c2111f7377df65c1fc3c72323f85483b3295989 --- nova/compute/api.py | 9 +++ nova/objects/request_spec.py | 8 +- nova/scheduler/utils.py | 9 ++- nova/tests/functional/test_servers.py | 79 +++++++++++++++++++ nova/tests/unit/compute/test_compute_api.py | 5 +- nova/tests/unit/objects/test_objects.py | 2 +- nova/tests/unit/objects/test_request_spec.py | 4 +- ...llocate-from-compute-95d048fbe9867c34.yaml | 13 +++ 8 files changed, 120 insertions(+), 9 deletions(-) create mode 100644 releasenotes/notes/bfv-instances-no-longer-allocate-from-compute-95d048fbe9867c34.yaml diff --git a/nova/compute/api.py b/nova/compute/api.py index a14e1606267..b33cbd03aff 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -885,6 +885,15 @@ def _provision_instances(self, context, instance_type, min_count, base_options['pci_requests'], filter_properties, instance_group, base_options['availability_zone'], security_groups=security_groups) + + if block_device_mapping: + # Record whether or not we are a BFV instance + root = block_device_mapping.root_bdm() + req_spec.is_bfv = bool(root and root.is_volume) + else: + # If we have no BDMs, we're clearly not BFV + req_spec.is_bfv = False + # NOTE(danms): We need to record num_instances on the request # spec as this is how the conductor knows how many were in this # batch. diff --git a/nova/objects/request_spec.py b/nova/objects/request_spec.py index a211edfd41d..7ba4a499804 100644 --- a/nova/objects/request_spec.py +++ b/nova/objects/request_spec.py @@ -44,7 +44,8 @@ class RequestSpec(base.NovaObject): # Version 1.8: Added security_groups # Version 1.9: Added user_id # Version 1.10: Added network_metadata - VERSION = '1.10' + # Version 1.11: Added is_bfv + VERSION = '1.11' fields = { 'id': fields.IntegerField(), @@ -82,11 +83,14 @@ class RequestSpec(base.NovaObject): 'instance_uuid': fields.UUIDField(), 'security_groups': fields.ObjectField('SecurityGroupList'), 'network_metadata': fields.ObjectField('NetworkMetadata'), + 'is_bfv': fields.BooleanField(), } def obj_make_compatible(self, primitive, target_version): super(RequestSpec, self).obj_make_compatible(primitive, target_version) target_version = versionutils.convert_version_to_tuple(target_version) + if target_version < (1, 11) and 'is_bfv' in primitive: + del primitive['is_bfv'] if target_version < (1, 10): if 'network_metadata' in primitive: del primitive['network_metadata'] @@ -473,7 +477,7 @@ def _from_db_object(context, spec, db_spec): # though they should match. if key in ['id', 'instance_uuid']: setattr(spec, key, db_spec[key]) - else: + elif key in spec_obj: setattr(spec, key, getattr(spec_obj, key)) spec._context = context diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index 5f35242daca..8e9b402da70 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -397,9 +397,14 @@ def resources_from_request_spec(spec_obj): fields.ResourceClass.MEMORY_MB: spec_obj.memory_mb, } - requested_disk_mb = (1024 * (spec_obj.root_gb + - spec_obj.ephemeral_gb) + + requested_disk_mb = ((1024 * spec_obj.ephemeral_gb) + spec_obj.swap) + + if 'is_bfv' not in spec_obj or not spec_obj.is_bfv: + # Either this is not a BFV instance, or we are not sure, + # so ask for root_gb allocation + requested_disk_mb += (1024 * spec_obj.root_gb) + # NOTE(sbauza): Disk request is expressed in MB but we count # resources in GB. Since there could be a remainder of the division # by 1024, we need to ceil the result to the next bigger Gb so we diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 4ec97875582..f1cf685aa3f 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -3578,6 +3578,85 @@ def test_soft_delete_then_restore(self): self._delete_and_check_allocations(server) +class VolumeBackedServerTest(integrated_helpers.ProviderUsageBaseTestCase): + """Tests for volume-backed servers.""" + + compute_driver = 'fake.SmallFakeDriver' + + def setUp(self): + super(VolumeBackedServerTest, self).setUp() + self.compute1 = self._start_compute('host1') + self.compute2 = self._start_compute('host2') + self.flavor_id = self._create_flavor() + + def _create_flavor(self): + body = { + 'flavor': { + 'id': 'vbst', + 'name': 'special', + 'ram': 512, + 'vcpus': 1, + 'disk': 10, + 'OS-FLV-EXT-DATA:ephemeral': 20, + 'swap': 5 * 1024, + 'rxtx_factor': 1.0, + 'os-flavor-access:is_public': True, + }, + } + self.admin_api.post_flavor(body) + return body['flavor']['id'] + + def _create_server(self): + with nova.utils.temporary_mutation(self.api, microversion='2.35'): + image_id = self.api.get_images()[0]['id'] + server_req = self._build_minimal_create_server_request( + self.api, 'trait-based-server', + image_uuid=image_id, + flavor_id=self.flavor_id, networks='none') + server = self.api.post_server({'server': server_req}) + server = self._wait_for_state_change(self.api, server, 'ACTIVE') + return server + + def _create_volume_backed_server(self): + self.useFixture(nova_fixtures.CinderFixtureNewAttachFlow(self)) + volume_id = nova_fixtures.CinderFixtureNewAttachFlow.IMAGE_BACKED_VOL + server_req_body = { + # There is no imageRef because this is boot from volume. + 'server': { + 'flavorRef': self.flavor_id, + 'name': 'test_volume_backed', + # We don't care about networking for this test. This + # requires microversion >= 2.37. + 'networks': 'none', + 'block_device_mapping_v2': [{ + 'boot_index': 0, + 'uuid': volume_id, + 'source_type': 'volume', + 'destination_type': 'volume' + }] + } + } + server = self.api.post_server(server_req_body) + server = self._wait_for_state_change(self.api, server, 'ACTIVE') + return server + + def test_ephemeral_has_disk_allocation(self): + server = self._create_server() + allocs = self._get_allocations_by_server_uuid(server['id']) + resources = list(allocs.values())[0]['resources'] + self.assertIn('MEMORY_MB', resources) + # 10gb root, 20gb ephemeral, 5gb swap + self.assertEqual(35, resources['DISK_GB']) + + def test_volume_backed_no_disk_allocation(self): + server = self._create_volume_backed_server() + allocs = self._get_allocations_by_server_uuid(server['id']) + resources = list(allocs.values())[0]['resources'] + self.assertIn('MEMORY_MB', resources) + # 0gb root, 20gb ephemeral, 5gb swap + self.assertEqual(25, resources['DISK_GB']) + + class TraitsBasedSchedulingTest(integrated_helpers.ProviderUsageBaseTestCase): """Tests for requesting a server with required traits in Placement""" diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 3848ed838c7..d879c36c2ee 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -4461,7 +4461,8 @@ def do_test( 'numa_topology': None, 'pci_requests': None} security_groups = {} - block_device_mapping = [objects.BlockDeviceMapping( + block_device_mapping = objects.BlockDeviceMappingList( + objects=[objects.BlockDeviceMapping( **fake_block_device.FakeDbBlockDeviceDict( { 'id': 1, @@ -4470,7 +4471,7 @@ def do_test( 'destination_type': 'volume', 'device_name': 'vda', 'boot_index': 0, - }))] + }))]) shutdown_terminate = True instance_group = None check_server_group_quota = False diff --git a/nova/tests/unit/objects/test_objects.py b/nova/tests/unit/objects/test_objects.py index 433cfa8b261..b88874870f5 100644 --- a/nova/tests/unit/objects/test_objects.py +++ b/nova/tests/unit/objects/test_objects.py @@ -1142,7 +1142,7 @@ def obj_name(cls): 'PowerVMLiveMigrateData': '1.3-79c635ecf61d1d70b5b9fa04bf778a91', 'Quotas': '1.3-40fcefe522111dddd3e5e6155702cf4e', 'QuotasNoOp': '1.3-347a039fc7cfee7b225b68b5181e0733', - 'RequestSpec': '1.10-afe714bc445ab7cb791150a775f3b779', + 'RequestSpec': '1.11-851a690dbf116fb5165cc94ea6c85629', 'S3ImageMapping': '1.0-7dd7366a890d82660ed121de9092276e', 'SchedulerLimits': '1.0-249c4bd8e62a9b327b7026b7f19cc641', 'SchedulerRetries': '1.1-3c9c8b16143ebbb6ad7030e999d14cc0', diff --git a/nova/tests/unit/objects/test_request_spec.py b/nova/tests/unit/objects/test_request_spec.py index 770f6b9d86a..4577d25409f 100644 --- a/nova/tests/unit/objects/test_request_spec.py +++ b/nova/tests/unit/objects/test_request_spec.py @@ -299,7 +299,7 @@ def test_from_primitives(self, mock_limits): spec = objects.RequestSpec.from_primitives(ctxt, spec_dict, filt_props) mock_limits.assert_called_once_with({}) # Make sure that all fields are set using that helper method - skip = ['id', 'security_groups', 'network_metadata'] + skip = ['id', 'security_groups', 'network_metadata', 'is_bfv'] for field in [f for f in spec.obj_fields if f not in skip]: self.assertTrue(spec.obj_attr_is_set(field), 'Field: %s is not set' % field) @@ -329,7 +329,7 @@ def test_from_components(self): filter_properties, instance_group, instance.availability_zone, objects.SecurityGroupList()) # Make sure that all fields are set using that helper method - skip = ['id', 'network_metadata'] + skip = ['id', 'network_metadata', 'is_bfv'] for field in [f for f in spec.obj_fields if f not in skip]: self.assertTrue(spec.obj_attr_is_set(field), 'Field: %s is not set' % field) diff --git a/releasenotes/notes/bfv-instances-no-longer-allocate-from-compute-95d048fbe9867c34.yaml b/releasenotes/notes/bfv-instances-no-longer-allocate-from-compute-95d048fbe9867c34.yaml new file mode 100644 index 00000000000..c30679e53dd --- /dev/null +++ b/releasenotes/notes/bfv-instances-no-longer-allocate-from-compute-95d048fbe9867c34.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Booting volume-backed instances no longer includes an incorrect allocation + against the compute node for the root disk. Historically, this has been + quite broken behavior in Nova, where volume-backed instances would count + against available space on the compute node, even though their storage + was provided by the volume service. Now, newly-booted volume-backed + instances will not create allocations of ``DISK_GB`` against the compute + node for the ``root_gb`` quantity in the flavor. Note that if you are + still using a scheduler configured with the (now deprecated) + DiskFilter (including deployments using CachingScheduler), the + above change will not apply to you. \ No newline at end of file