Skip to content

Commit

Permalink
Avoid requesting DISK_GB allocation for root_gb on BFV instances
Browse files Browse the repository at this point in the history
Right now, we still ask placement for a disk allocation covering
swap, ephemeral, and root disks for the instance even if the instance
is going to be volume-backed. This patch makes us not include the
root size in that calculation for placement, avoiding a false
failure because the volume size is counted against the compute's
available space.

To do this, we need another flag in request_spec to track the
BFV-ness of the instance. Right now, this patch just sets that on
new builds and the scheduler client assumes a lack of said flag
as "I don't know, so assume not-BFV" for compatibility. A
subsequent patch can calculate that flag for existing instances
so that we will be able to heal over time by migrating instances
or re-writing their allocations to reflect reality.

Partial-Bug: #1469179
Change-Id: I9c2111f7377df65c1fc3c72323f85483b3295989
  • Loading branch information
kk7ds authored and Eric Fried committed Jul 18, 2018
1 parent eb4f65a commit 03c596a
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 9 deletions.
9 changes: 9 additions & 0 deletions nova/compute/api.py
Expand Up @@ -885,6 +885,15 @@ def _provision_instances(self, context, instance_type, min_count,
base_options['pci_requests'], filter_properties,
instance_group, base_options['availability_zone'],
security_groups=security_groups)

if block_device_mapping:
# Record whether or not we are a BFV instance
root = block_device_mapping.root_bdm()
req_spec.is_bfv = bool(root and root.is_volume)
else:
# If we have no BDMs, we're clearly not BFV
req_spec.is_bfv = False

# NOTE(danms): We need to record num_instances on the request
# spec as this is how the conductor knows how many were in this
# batch.
Expand Down
8 changes: 6 additions & 2 deletions nova/objects/request_spec.py
Expand Up @@ -44,7 +44,8 @@ class RequestSpec(base.NovaObject):
# Version 1.8: Added security_groups
# Version 1.9: Added user_id
# Version 1.10: Added network_metadata
VERSION = '1.10'
# Version 1.11: Added is_bfv
VERSION = '1.11'

fields = {
'id': fields.IntegerField(),
Expand Down Expand Up @@ -82,11 +83,14 @@ class RequestSpec(base.NovaObject):
'instance_uuid': fields.UUIDField(),
'security_groups': fields.ObjectField('SecurityGroupList'),
'network_metadata': fields.ObjectField('NetworkMetadata'),
'is_bfv': fields.BooleanField(),
}

def obj_make_compatible(self, primitive, target_version):
super(RequestSpec, self).obj_make_compatible(primitive, target_version)
target_version = versionutils.convert_version_to_tuple(target_version)
if target_version < (1, 11) and 'is_bfv' in primitive:
del primitive['is_bfv']
if target_version < (1, 10):
if 'network_metadata' in primitive:
del primitive['network_metadata']
Expand Down Expand Up @@ -473,7 +477,7 @@ def _from_db_object(context, spec, db_spec):
# though they should match.
if key in ['id', 'instance_uuid']:
setattr(spec, key, db_spec[key])
else:
elif key in spec_obj:
setattr(spec, key, getattr(spec_obj, key))
spec._context = context

Expand Down
9 changes: 7 additions & 2 deletions nova/scheduler/utils.py
Expand Up @@ -397,9 +397,14 @@ def resources_from_request_spec(spec_obj):
fields.ResourceClass.MEMORY_MB: spec_obj.memory_mb,
}

requested_disk_mb = (1024 * (spec_obj.root_gb +
spec_obj.ephemeral_gb) +
requested_disk_mb = ((1024 * spec_obj.ephemeral_gb) +
spec_obj.swap)

if 'is_bfv' not in spec_obj or not spec_obj.is_bfv:
# Either this is not a BFV instance, or we are not sure,
# so ask for root_gb allocation
requested_disk_mb += (1024 * spec_obj.root_gb)

# NOTE(sbauza): Disk request is expressed in MB but we count
# resources in GB. Since there could be a remainder of the division
# by 1024, we need to ceil the result to the next bigger Gb so we
Expand Down
79 changes: 79 additions & 0 deletions nova/tests/functional/test_servers.py
Expand Up @@ -3578,6 +3578,85 @@ def test_soft_delete_then_restore(self):
self._delete_and_check_allocations(server)


class VolumeBackedServerTest(integrated_helpers.ProviderUsageBaseTestCase):
"""Tests for volume-backed servers."""

compute_driver = 'fake.SmallFakeDriver'

def setUp(self):
super(VolumeBackedServerTest, self).setUp()
self.compute1 = self._start_compute('host1')
self.compute2 = self._start_compute('host2')
self.flavor_id = self._create_flavor()

def _create_flavor(self):
body = {
'flavor': {
'id': 'vbst',
'name': 'special',
'ram': 512,
'vcpus': 1,
'disk': 10,
'OS-FLV-EXT-DATA:ephemeral': 20,
'swap': 5 * 1024,
'rxtx_factor': 1.0,
'os-flavor-access:is_public': True,
},
}
self.admin_api.post_flavor(body)
return body['flavor']['id']

def _create_server(self):
with nova.utils.temporary_mutation(self.api, microversion='2.35'):
image_id = self.api.get_images()[0]['id']
server_req = self._build_minimal_create_server_request(
self.api, 'trait-based-server',
image_uuid=image_id,
flavor_id=self.flavor_id, networks='none')
server = self.api.post_server({'server': server_req})
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
return server

def _create_volume_backed_server(self):
self.useFixture(nova_fixtures.CinderFixtureNewAttachFlow(self))
volume_id = nova_fixtures.CinderFixtureNewAttachFlow.IMAGE_BACKED_VOL
server_req_body = {
# There is no imageRef because this is boot from volume.
'server': {
'flavorRef': self.flavor_id,
'name': 'test_volume_backed',
# We don't care about networking for this test. This
# requires microversion >= 2.37.
'networks': 'none',
'block_device_mapping_v2': [{
'boot_index': 0,
'uuid': volume_id,
'source_type': 'volume',
'destination_type': 'volume'
}]
}
}
server = self.api.post_server(server_req_body)
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
return server

def test_ephemeral_has_disk_allocation(self):
server = self._create_server()
allocs = self._get_allocations_by_server_uuid(server['id'])
resources = list(allocs.values())[0]['resources']
self.assertIn('MEMORY_MB', resources)
# 10gb root, 20gb ephemeral, 5gb swap
self.assertEqual(35, resources['DISK_GB'])

def test_volume_backed_no_disk_allocation(self):
server = self._create_volume_backed_server()
allocs = self._get_allocations_by_server_uuid(server['id'])
resources = list(allocs.values())[0]['resources']
self.assertIn('MEMORY_MB', resources)
# 0gb root, 20gb ephemeral, 5gb swap
self.assertEqual(25, resources['DISK_GB'])


class TraitsBasedSchedulingTest(integrated_helpers.ProviderUsageBaseTestCase):
"""Tests for requesting a server with required traits in Placement"""

Expand Down
5 changes: 3 additions & 2 deletions nova/tests/unit/compute/test_compute_api.py
Expand Up @@ -4461,7 +4461,8 @@ def do_test(
'numa_topology': None,
'pci_requests': None}
security_groups = {}
block_device_mapping = [objects.BlockDeviceMapping(
block_device_mapping = objects.BlockDeviceMappingList(
objects=[objects.BlockDeviceMapping(
**fake_block_device.FakeDbBlockDeviceDict(
{
'id': 1,
Expand All @@ -4470,7 +4471,7 @@ def do_test(
'destination_type': 'volume',
'device_name': 'vda',
'boot_index': 0,
}))]
}))])
shutdown_terminate = True
instance_group = None
check_server_group_quota = False
Expand Down
2 changes: 1 addition & 1 deletion nova/tests/unit/objects/test_objects.py
Expand Up @@ -1142,7 +1142,7 @@ def obj_name(cls):
'PowerVMLiveMigrateData': '1.3-79c635ecf61d1d70b5b9fa04bf778a91',
'Quotas': '1.3-40fcefe522111dddd3e5e6155702cf4e',
'QuotasNoOp': '1.3-347a039fc7cfee7b225b68b5181e0733',
'RequestSpec': '1.10-afe714bc445ab7cb791150a775f3b779',
'RequestSpec': '1.11-851a690dbf116fb5165cc94ea6c85629',
'S3ImageMapping': '1.0-7dd7366a890d82660ed121de9092276e',
'SchedulerLimits': '1.0-249c4bd8e62a9b327b7026b7f19cc641',
'SchedulerRetries': '1.1-3c9c8b16143ebbb6ad7030e999d14cc0',
Expand Down
4 changes: 2 additions & 2 deletions nova/tests/unit/objects/test_request_spec.py
Expand Up @@ -299,7 +299,7 @@ def test_from_primitives(self, mock_limits):
spec = objects.RequestSpec.from_primitives(ctxt, spec_dict, filt_props)
mock_limits.assert_called_once_with({})
# Make sure that all fields are set using that helper method
skip = ['id', 'security_groups', 'network_metadata']
skip = ['id', 'security_groups', 'network_metadata', 'is_bfv']
for field in [f for f in spec.obj_fields if f not in skip]:
self.assertTrue(spec.obj_attr_is_set(field),
'Field: %s is not set' % field)
Expand Down Expand Up @@ -329,7 +329,7 @@ def test_from_components(self):
filter_properties, instance_group, instance.availability_zone,
objects.SecurityGroupList())
# Make sure that all fields are set using that helper method
skip = ['id', 'network_metadata']
skip = ['id', 'network_metadata', 'is_bfv']
for field in [f for f in spec.obj_fields if f not in skip]:
self.assertTrue(spec.obj_attr_is_set(field),
'Field: %s is not set' % field)
Expand Down
@@ -0,0 +1,13 @@
---
fixes:
- |
Booting volume-backed instances no longer includes an incorrect allocation
against the compute node for the root disk. Historically, this has been
quite broken behavior in Nova, where volume-backed instances would count
against available space on the compute node, even though their storage
was provided by the volume service. Now, newly-booted volume-backed
instances will not create allocations of ``DISK_GB`` against the compute
node for the ``root_gb`` quantity in the flavor. Note that if you are
still using a scheduler configured with the (now deprecated)
DiskFilter (including deployments using CachingScheduler), the
above change will not apply to you.

0 comments on commit 03c596a

Please sign in to comment.