Skip to content

Commit

Permalink
Enforce quota limitations for instance resize.
Browse files Browse the repository at this point in the history
Fixes LP 1030010.

Close off a back-door allowing users to go over-quota on ram
and/or cores by creating a tiny instance and then resizing
to any size regardless of the remaining usage headroom.

The quota management logic is distributed in the sense that
the reservations are passed with the RPC casts that may fail
silently, so that the rollback or commit is handled by the
actor finalizing the resize/confirmation/reversion.

Up-sizing requires there is sufficient quota headroom upfront
to accommodate the larger size, whereas conversely down-sizing
only results in a usage reduction when the resize is confirmed.

Change-Id: I2b1cbb098c79e37d7ad19221fe23657eb018eae6
  • Loading branch information
Eoghan Glynn committed Aug 14, 2012
1 parent eac3d00 commit f94ae65
Show file tree
Hide file tree
Showing 12 changed files with 285 additions and 74 deletions.
126 changes: 122 additions & 4 deletions nova/compute/api.py
Expand Up @@ -923,10 +923,23 @@ def _delete(self, context, instance):
context, instance['uuid'], 'finished')
if migration_ref:
src_host = migration_ref['source_compute']
# Call since this can race with the terminate_instance
# Call since this can race with the terminate_instance.
# The resize is done but awaiting confirmation/reversion,
# so there are two cases:
# 1. up-resize: here -instance['vcpus'/'memory_mb'] match
# the quota usages accounted for this instance,
# so no further quota adjustment is needed
# 2. down-resize: here -instance['vcpus'/'memory_mb'] are
# shy by delta(old, new) from the quota usages accounted
# for this instance, so we must adjust
deltas = self._downsize_quota_delta(context,
migration_ref)
downsize_reservations = self._reserve_quota_delta(context,
deltas)
self.compute_rpcapi.confirm_resize(context,
instance, migration_ref['id'],
host=src_host, cast=False)
host=src_host, cast=False,
reservations=downsize_reservations)

self.compute_rpcapi.terminate_instance(context, instance)

Expand Down Expand Up @@ -1343,12 +1356,16 @@ def revert_resize(self, context, instance):
raise exception.MigrationNotFoundByStatus(
instance_id=instance['uuid'], status='finished')

# reverse quota reservation for increased resource usage
deltas = self._reverse_upsize_quota_delta(context, migration_ref)
reservations = self._reserve_quota_delta(context, deltas)

instance = self.update(context, instance,
task_state=task_states.RESIZE_REVERTING)

self.compute_rpcapi.revert_resize(context,
instance=instance, migration_id=migration_ref['id'],
host=migration_ref['dest_compute'])
host=migration_ref['dest_compute'], reservations=reservations)

self.db.migration_update(context, migration_ref['id'],
{'status': 'reverted'})
Expand All @@ -1365,16 +1382,89 @@ def confirm_resize(self, context, instance):
raise exception.MigrationNotFoundByStatus(
instance_id=instance['uuid'], status='finished')

# reserve quota only for any decrease in resource usage
deltas = self._downsize_quota_delta(context, migration_ref)
reservations = self._reserve_quota_delta(context, deltas)

instance = self.update(context, instance, vm_state=vm_states.ACTIVE,
task_state=None)

self.compute_rpcapi.confirm_resize(context,
instance=instance, migration_id=migration_ref['id'],
host=migration_ref['source_compute'])
host=migration_ref['source_compute'],
reservations=reservations)

self.db.migration_update(context, migration_ref['id'],
{'status': 'confirmed'})

@staticmethod
def _resize_quota_delta(context, new_instance_type,
old_instance_type, sense, compare):
"""
Calculate any quota adjustment required at a particular point
in the resize cycle.
:param context: the request context
:param new_instance_type: the target instance type
:param old_instance_type: the original instance type
:param sense: the sense of the adjustment, 1 indicates a
forward adjustment, whereas -1 indicates a
reversal of a prior adjustment
:param compare: the direction of the comparison, 1 indicates
we're checking for positive deltas, whereas
-1 indicates negative deltas
"""
def _quota_delta(resource):
return sense * (new_instance_type[resource] -
old_instance_type[resource])

deltas = {}
if compare * _quota_delta('vcpus') > 0:
deltas['cores'] = _quota_delta('vcpus')
if compare * _quota_delta('memory_mb') > 0:
deltas['ram'] = _quota_delta('memory_mb')

return deltas

@staticmethod
def _upsize_quota_delta(context, new_instance_type, old_instance_type):
"""
Calculate deltas required to adjust quota for an instance upsize.
"""
return API._resize_quota_delta(context, new_instance_type,
old_instance_type, 1, 1)

@staticmethod
def _reverse_upsize_quota_delta(context, migration_ref):
"""
Calculate deltas required to reverse a prior upsizing
quota adjustment.
"""
old_instance_type = instance_types.get_instance_type(
migration_ref['old_instance_type_id'])
new_instance_type = instance_types.get_instance_type(
migration_ref['new_instance_type_id'])

return API._resize_quota_delta(context, new_instance_type,
old_instance_type, -1, -1)

@staticmethod
def _downsize_quota_delta(context, migration_ref):
"""
Calculate deltas required to adjust quota for an instance downsize.
"""
old_instance_type = instance_types.get_instance_type(
migration_ref['old_instance_type_id'])
new_instance_type = instance_types.get_instance_type(
migration_ref['new_instance_type_id'])

return API._resize_quota_delta(context, new_instance_type,
old_instance_type, 1, -1)

@staticmethod
def _reserve_quota_delta(context, deltas):
return QUOTAS.reserve(context, **deltas) if deltas else None

@wrap_check_policy
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED],
Expand Down Expand Up @@ -1424,6 +1514,33 @@ def resize(self, context, instance, flavor_id=None, **kwargs):
if (current_memory_mb == new_memory_mb) and flavor_id:
raise exception.CannotResizeToSameSize()

# ensure there is sufficient headroom for upsizes
deltas = self._upsize_quota_delta(context, new_instance_type,
current_instance_type)
try:
reservations = self._reserve_quota_delta(context, deltas)
except exception.OverQuota as exc:
quotas = exc.kwargs['quotas']
usages = exc.kwargs['usages']
overs = exc.kwargs['overs']

headroom = dict((res, quotas[res] -
(usages[res]['in_use'] + usages[res]['reserved']))
for res in quotas.keys())

resource = overs[0]
used = quotas[resource] - headroom[resource]
total_allowed = used + headroom[resource]
overs = ','.join(overs)

pid = context.project_id
LOG.warn(_("%(overs)s quota exceeded for %(pid)s,"
" tried to resize instance. %(msg)s"), locals())
raise exception.TooManyInstances(overs=overs,
req=deltas[resource],
used=used, allowed=total_allowed,
resource=resource)

instance = self.update(context, instance,
task_state=task_states.RESIZE_PREP, progress=0, **kwargs)

Expand All @@ -1443,6 +1560,7 @@ def resize(self, context, instance, flavor_id=None, **kwargs):
"image": image,
"request_spec": jsonutils.to_primitive(request_spec),
"filter_properties": filter_properties,
"reservations": reservations,
}
self.scheduler_rpcapi.prep_resize(context, **args)

Expand Down
77 changes: 53 additions & 24 deletions nova/compute/manager.py
Expand Up @@ -68,6 +68,7 @@
from nova.openstack.common import rpc
from nova.openstack.common.rpc import common as rpc_common
from nova.openstack.common import timeutils
from nova import quota
from nova.scheduler import rpcapi as scheduler_rpcapi
from nova import utils
from nova.virt import driver
Expand Down Expand Up @@ -146,6 +147,8 @@
FLAGS = flags.FLAGS
FLAGS.register_opts(compute_opts)

QUOTAS = quota.QUOTAS

LOG = logging.getLogger(__name__)


Expand Down Expand Up @@ -220,7 +223,7 @@ def _get_image_meta(context, image_ref):
class ComputeManager(manager.SchedulerDependentManager):
"""Manages the running instances from creation to destruction."""

RPC_API_VERSION = '1.41'
RPC_API_VERSION = '1.42'

def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
Expand Down Expand Up @@ -1370,7 +1373,7 @@ def change_instance_metadata(self, context, diff, instance=None,
@checks_instance_lock
@wrap_instance_fault
def confirm_resize(self, context, migration_id, instance_uuid=None,
instance=None):
instance=None, reservations=None):
"""Destroys the source instance."""
migration_ref = self.db.migration_get(context, migration_id)
if not instance:
Expand All @@ -1380,23 +1383,27 @@ def confirm_resize(self, context, migration_id, instance_uuid=None,
self._notify_about_instance_usage(context, instance,
"resize.confirm.start")

# NOTE(tr3buchet): tear down networks on source host
self.network_api.setup_networks_on_host(context, instance,
migration_ref['source_compute'], teardown=True)
with self._error_out_instance_on_exception(context, instance['uuid'],
reservations):
# NOTE(tr3buchet): tear down networks on source host
self.network_api.setup_networks_on_host(context, instance,
migration_ref['source_compute'], teardown=True)

network_info = self._get_instance_nw_info(context, instance)
self.driver.confirm_migration(migration_ref, instance,
self._legacy_nw_info(network_info))
network_info = self._get_instance_nw_info(context, instance)
self.driver.confirm_migration(migration_ref, instance,
self._legacy_nw_info(network_info))

self._notify_about_instance_usage(
context, instance, "resize.confirm.end",
network_info=network_info)
self._notify_about_instance_usage(
context, instance, "resize.confirm.end",
network_info=network_info)

self._quota_commit(context, reservations)

@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@checks_instance_lock
@wrap_instance_fault
def revert_resize(self, context, migration_id, instance=None,
instance_uuid=None):
instance_uuid=None, reservations=None):
"""Destroys the new instance on the destination machine.
Reverts the model changes, and powers on the old instance on the
Expand All @@ -1408,21 +1415,23 @@ def revert_resize(self, context, migration_id, instance=None,
instance = self.db.instance_get_by_uuid(context,
migration_ref.instance_uuid)

with self._error_out_instance_on_exception(context, instance['uuid']):
with self._error_out_instance_on_exception(context, instance['uuid'],
reservations):
# NOTE(tr3buchet): tear down networks on destination host
self.network_api.setup_networks_on_host(context, instance,
teardown=True)

network_info = self._get_instance_nw_info(context, instance)
self.driver.destroy(instance, self._legacy_nw_info(network_info))
self.compute_rpcapi.finish_revert_resize(context, instance,
migration_ref['id'], migration_ref['source_compute'])
migration_ref['id'], migration_ref['source_compute'],
reservations)

@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@checks_instance_lock
@wrap_instance_fault
def finish_revert_resize(self, context, migration_id, instance_uuid=None,
instance=None):
instance=None, reservations=None):
"""Finishes the second half of reverting a resize.
Power back on the source instance and revert the resized attributes
Expand All @@ -1434,7 +1443,8 @@ def finish_revert_resize(self, context, migration_id, instance_uuid=None,
instance = self.db.instance_get_by_uuid(context,
migration_ref.instance_uuid)

with self._error_out_instance_on_exception(context, instance['uuid']):
with self._error_out_instance_on_exception(context, instance['uuid'],
reservations):
network_info = self._get_instance_nw_info(context, instance)

self._notify_about_instance_usage(
Expand Down Expand Up @@ -1466,11 +1476,24 @@ def finish_revert_resize(self, context, migration_id, instance_uuid=None,
self._notify_about_instance_usage(
context, instance, "resize.revert.end")

self._quota_commit(context, reservations)

@staticmethod
def _quota_commit(context, reservations):
if reservations:
QUOTAS.commit(context, reservations)

@staticmethod
def _quota_rollback(context, reservations):
if reservations:
QUOTAS.rollback(context, reservations)

@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@checks_instance_lock
@wrap_instance_fault
def prep_resize(self, context, image, instance=None, instance_uuid=None,
instance_type=None, instance_type_id=None):
instance_type=None, instance_type_id=None,
reservations=None):
"""Initiates the process of moving a running instance to another host.
Possibly changes the RAM and disk size in the process.
Expand All @@ -1484,7 +1507,8 @@ def prep_resize(self, context, image, instance=None, instance_uuid=None,
if not instance_type:
instance_type = instance_types.get_instance_type(instance_type_id)

with self._error_out_instance_on_exception(context, instance['uuid']):
with self._error_out_instance_on_exception(context, instance['uuid'],
reservations):
compute_utils.notify_usage_exists(
context, instance, current_period=True)
self._notify_about_instance_usage(
Expand Down Expand Up @@ -1513,7 +1537,7 @@ def prep_resize(self, context, image, instance=None, instance_uuid=None,

LOG.audit(_('Migrating'), context=context, instance=instance)
self.compute_rpcapi.resize_instance(context, instance,
migration_ref['id'], image)
migration_ref['id'], image, reservations)

extra_usage_info = dict(
new_instance_type=instance_type['name'],
Expand All @@ -1527,14 +1551,15 @@ def prep_resize(self, context, image, instance=None, instance_uuid=None,
@checks_instance_lock
@wrap_instance_fault
def resize_instance(self, context, migration_id, image, instance=None,
instance_uuid=None):
instance_uuid=None, reservations=None):
"""Starts the migration of a running instance to another host."""
migration_ref = self.db.migration_get(context, migration_id)
if not instance:
instance = self.db.instance_get_by_uuid(context,
migration_ref.instance_uuid)

with self._error_out_instance_on_exception(context, instance['uuid']):
with self._error_out_instance_on_exception(context, instance['uuid'],
reservations):
instance_type_ref = self.db.instance_type_get(context,
migration_ref.new_instance_type_id)

Expand Down Expand Up @@ -1562,7 +1587,7 @@ def resize_instance(self, context, migration_id, image, instance=None,
task_state=task_states.RESIZE_MIGRATED)

self.compute_rpcapi.finish_resize(context, instance, migration_id,
image, disk_info, migration_ref['dest_compute'])
image, disk_info, migration_ref['dest_compute'], reservations)

self._notify_about_instance_usage(context, instance, "resize.end",
network_info=network_info)
Expand Down Expand Up @@ -1621,7 +1646,7 @@ def _finish_resize(self, context, instance, migration_ref, disk_info,
@checks_instance_lock
@wrap_instance_fault
def finish_resize(self, context, migration_id, disk_info, image,
instance_uuid=None, instance=None):
instance_uuid=None, instance=None, reservations=None):
"""Completes the migration process.
Sets up the newly transferred disk and turns on the instance at its
Expand All @@ -1636,7 +1661,9 @@ def finish_resize(self, context, migration_id, disk_info, image,
try:
self._finish_resize(context, instance, migration_ref,
disk_info, image)
self._quota_commit(context, reservations)
except Exception, error:
self._quota_rollback(context, reservations)
with excutils.save_and_reraise_exception():
LOG.error(_('%s. Setting instance vm_state to ERROR') % error,
instance=instance)
Expand Down Expand Up @@ -2882,10 +2909,12 @@ def deleted_instance(instance):
return [i for i in instances if deleted_instance(i)]

@contextlib.contextmanager
def _error_out_instance_on_exception(self, context, instance_uuid):
def _error_out_instance_on_exception(self, context, instance_uuid,
reservations=None):
try:
yield
except Exception, error:
self._quota_rollback(context, reservations)
with excutils.save_and_reraise_exception():
msg = _('%s. Setting instance vm_state to ERROR')
LOG.error(msg % error, instance_uuid=instance_uuid)
Expand Down

0 comments on commit f94ae65

Please sign in to comment.