Skip to content

Commit

Permalink
Add support for resizes to resource tracker.
Browse files Browse the repository at this point in the history
Keep track of additional resources required to resize an instance
to a new host.  Also hold resources for a revert resize to the original
host.

This fixes race conditions where the destination host could become
overscheduled.  (or the source host in the event of a revert)

bug 1065267

Change-Id: Ic565d4e2ab9bee40f25fe9f198e1217cdd92ca1b
  • Loading branch information
Brian Elliott committed Nov 14, 2012
1 parent 16266a4 commit 5bc0ff6
Show file tree
Hide file tree
Showing 7 changed files with 675 additions and 103 deletions.
12 changes: 6 additions & 6 deletions nova/compute/api.py
Expand Up @@ -1564,13 +1564,13 @@ def revert_resize(self, context, instance):
task_state=task_states.RESIZE_REVERTING,
expected_task_state=None)

self.db.migration_update(elevated, migration_ref['id'],
{'status': 'reverting'})

self.compute_rpcapi.revert_resize(context,
instance=instance, migration=migration_ref,
host=migration_ref['dest_compute'], reservations=reservations)

self.db.migration_update(elevated, migration_ref['id'],
{'status': 'reverted'})

@wrap_check_policy
@check_instance_lock
@check_instance_state(vm_state=[vm_states.RESIZED])
Expand All @@ -1588,14 +1588,14 @@ def confirm_resize(self, context, instance):
task_state=None,
expected_task_state=None)

self.db.migration_update(elevated, migration_ref['id'],
{'status': 'confirming'})

self.compute_rpcapi.confirm_resize(context,
instance=instance, migration=migration_ref,
host=migration_ref['source_compute'],
reservations=reservations)

self.db.migration_update(elevated, migration_ref['id'],
{'status': 'confirmed'})

@staticmethod
def _resize_quota_delta(context, new_instance_type,
old_instance_type, sense, compare):
Expand Down
35 changes: 35 additions & 0 deletions nova/compute/claims.py
Expand Up @@ -29,6 +29,9 @@
class NopClaim(object):
"""For use with compute drivers that do not support resource tracking"""

def __init__(self, migration=None):
self.migration = migration

@property
def disk_gb(self):
return 0
Expand Down Expand Up @@ -184,3 +187,35 @@ def _test(self, type_, unit, total, used, requested, limit):
LOG.info(msg, instance=self.instance)

return can_claim


class ResizeClaim(Claim):
"""Claim used for holding resources for an incoming resize/migration
operation.
"""
def __init__(self, instance, instance_type, tracker):
super(ResizeClaim, self).__init__(instance, tracker)
self.instance_type = instance_type
self.migration = None

@property
def disk_gb(self):
return (self.instance_type['root_gb'] +
self.instance_type['ephemeral_gb'])

@property
def memory_mb(self):
return self.instance_type['memory_mb']

@property
def vcpus(self):
return self.instance_type['vcpus']

@lockutils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, 'nova-')
def abort(self):
"""Compute operation requiring claimed resources has failed or
been aborted.
"""
LOG.debug(_("Aborting claim: %s") % self, instance=self.instance)
self.tracker.abort_resize_claim(self.instance['uuid'],
self.instance_type)
58 changes: 33 additions & 25 deletions nova/compute/manager.py
Expand Up @@ -1593,6 +1593,9 @@ def confirm_resize(self, context, instance, reservations=None,
self.driver.confirm_migration(migration, instance,
self._legacy_nw_info(network_info))

rt = self._get_resource_tracker(instance.get('node'))
rt.confirm_resize(context, migration)

self._notify_about_instance_usage(
context, instance, "resize.confirm.end",
network_info=network_info)
Expand Down Expand Up @@ -1637,6 +1640,9 @@ def revert_resize(self, context, instance, migration=None,

self._terminate_volume_connections(context, instance)

rt = self._get_resource_tracker(instance.get('node'))
rt.revert_resize(context, migration, status='reverted_dest')

self.compute_rpcapi.finish_revert_resize(context, instance,
migration, migration['source_compute'],
reservations)
Expand Down Expand Up @@ -1707,8 +1713,8 @@ def finish_revert_resize(self, context, instance, reservations=None,
vm_state=vm_states.ACTIVE,
task_state=None)

self.db.migration_update(elevated, migration['id'],
{'status': 'reverted'})
rt = self._get_resource_tracker(instance.get('node'))
rt.revert_resize(context, migration)

self._notify_about_instance_usage(
context, instance, "resize.revert.end")
Expand All @@ -1725,6 +1731,29 @@ def _quota_rollback(context, reservations):
if reservations:
QUOTAS.rollback(context, reservations)

def _prep_resize(self, context, image, instance, instance_type,
reservations, request_spec, filter_properties):

if not filter_properties:
filter_properties = {}

same_host = instance['host'] == self.host
if same_host and not CONF.allow_resize_to_same_host:
self._set_instance_error_state(context, instance['uuid'])
msg = _('destination same as source!')
raise exception.MigrationError(msg)

limits = filter_properties.get('limits', {})
rt = self._get_resource_tracker(instance.get('node'))
with rt.resize_claim(context, instance, instance_type, limits=limits) \
as claim:
migration_ref = claim.migration

LOG.audit(_('Migrating'), context=context,
instance=instance)
self.compute_rpcapi.resize_instance(context, instance,
migration_ref, image, instance_type, reservations)

@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@reverts_task_state
@wrap_instance_fault
Expand All @@ -1742,30 +1771,9 @@ def prep_resize(self, context, image, instance, instance_type,
context, instance, current_period=True)
self._notify_about_instance_usage(
context, instance, "resize.prep.start")

try:
same_host = instance['host'] == self.host
if same_host and not CONF.allow_resize_to_same_host:
self._set_instance_error_state(context, instance['uuid'])
msg = _('destination same as source!')
raise exception.MigrationError(msg)

old_instance_type = instance['instance_type']

migration_ref = self.db.migration_create(context.elevated(),
{'instance_uuid': instance['uuid'],
'source_compute': instance['host'],
'dest_compute': self.host,
'dest_host': self.driver.get_host_ip_addr(),
'old_instance_type_id': old_instance_type['id'],
'new_instance_type_id': instance_type['id'],
'status': 'pre-migrating'})

LOG.audit(_('Migrating'), context=context,
instance=instance)
self.compute_rpcapi.resize_instance(context, instance,
migration_ref, image, instance_type, reservations)

self._prep_resize(context, image, instance, instance_type,
reservations, request_spec, filter_properties)
except Exception:
# try to re-schedule the resize elsewhere:
self._reschedule_resize_or_reraise(context, image, instance,
Expand Down

0 comments on commit 5bc0ff6

Please sign in to comment.