Skip to content

Commit

Permalink
Vm state management and error states
Browse files Browse the repository at this point in the history
this implements the blueprint nova-vm-state-management
It implements the following functionalities:
- Filter compute api calls according to state of the VM
(defined in compute/state_checker).
- Sets error state if the scheduler cannot allocate the VM in any host
- Handles the create/delete concurrency in the compute manager

Change-Id: Ie6d016b7d4781f70bb5967f204fa88a6412bd727
  • Loading branch information
David Subiros authored and vishvananda committed Dec 13, 2011
1 parent d3b75b7 commit ff753cd
Show file tree
Hide file tree
Showing 11 changed files with 533 additions and 100 deletions.
118 changes: 61 additions & 57 deletions nova/compute/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from nova import volume
from nova.compute import instance_types
from nova.compute import power_state
from nova.compute import state_checker
from nova.compute import task_states
from nova.compute import vm_states
from nova.scheduler import api as scheduler_api
Expand All @@ -49,25 +50,8 @@
flags.DECLARE('vncproxy_topic', 'nova.vnc')
flags.DEFINE_integer('find_host_timeout', 30,
'Timeout after NN seconds when looking for a host.')


def _is_able_to_shutdown(instance):
vm_state = instance["vm_state"]
instance_uuid = instance["uuid"]

valid_shutdown_states = [
vm_states.ACTIVE,
vm_states.REBUILDING,
vm_states.BUILDING,
vm_states.ERROR,
]

if vm_state not in valid_shutdown_states:
LOG.warn(_("Instance %(instance_uuid)s cannot be shutdown from "
"its current state: %(vm_state)s.") % locals())
return False

return True
flags.DEFINE_boolean('api_check_vm_states', True,
'Filter calls by vm state')


def _is_queued_delete(instance):
Expand All @@ -83,6 +67,27 @@ def _is_queued_delete(instance):
return True


class check_vm_state(object):
"""Class to wrap API functions that are sensitive to the VM state.
If the instance is in the wrong state, the wrapper will raise an exception.
It uses state_checker to decide if the call is allowed or not.
"""

def __init__(self, method_name):
self.method_name = method_name

def __call__(self, f):
def _state_checker_wrap(api, context, instance, *args, **kw):
if FLAGS.api_check_vm_states and \
state_checker.is_blocked(self.method_name, context, instance):
raise exception.InstanceInvalidState(\
instance_uuid=instance['uuid'], method=self.method_name)
else:
return f(api, context, instance, *args, **kw)
return _state_checker_wrap


class API(base.Base):
"""API for interacting with the compute manager."""

Expand Down Expand Up @@ -766,15 +771,13 @@ def update(self, context, instance, **kwargs):
rv = self.db.instance_update(context, instance["id"], kwargs)
return dict(rv.iteritems())

@scheduler_api.reroute_compute("soft_delete")
@check_vm_state(state_checker.SOFT_DELETE)
@scheduler_api.reroute_compute(state_checker.SOFT_DELETE)
def soft_delete(self, context, instance):
"""Terminate an instance."""
instance_uuid = instance["uuid"]
LOG.debug(_("Going to try to soft delete %s"), instance_uuid)

if not _is_able_to_shutdown(instance):
return

# NOTE(jerdfelt): The compute daemon handles reclaiming instances
# that are in soft delete. If there is no host assigned, there is
# no daemon to reclaim, so delete it immediately.
Expand Down Expand Up @@ -806,20 +809,18 @@ def _delete(self, context, instance):
else:
self.db.instance_destroy(context, instance['id'])

@scheduler_api.reroute_compute("delete")
@check_vm_state(state_checker.DELETE)
@scheduler_api.reroute_compute(state_checker.DELETE)
def delete(self, context, instance):
"""Terminate an instance."""
LOG.debug(_("Going to try to terminate %s"), instance["uuid"])

if not _is_able_to_shutdown(instance):
return

self._delete(context, instance)

@scheduler_api.reroute_compute("restore")
@check_vm_state(state_checker.RESTORE)
@scheduler_api.reroute_compute(state_checker.RESTORE)
def restore(self, context, instance):
"""Restore a previously deleted (but not reclaimed) instance."""

if not _is_queued_delete(instance):
return

Expand All @@ -837,7 +838,8 @@ def restore(self, context, instance):
self._cast_compute_message('power_on_instance', context,
instance['uuid'], host)

@scheduler_api.reroute_compute("force_delete")
@check_vm_state(state_checker.FORCE_DELETE)
@scheduler_api.reroute_compute(state_checker.FORCE_DELETE)
def force_delete(self, context, instance):
"""Force delete a previously deleted (but not reclaimed) instance."""

Expand All @@ -846,15 +848,13 @@ def force_delete(self, context, instance):

self._delete(context, instance)

@scheduler_api.reroute_compute("stop")
@check_vm_state(state_checker.STOP)
@scheduler_api.reroute_compute(state_checker.STOP)
def stop(self, context, instance):
"""Stop an instance."""
instance_uuid = instance["uuid"]
LOG.debug(_("Going to try to stop %s"), instance_uuid)

if not _is_able_to_shutdown(instance):
return

self.update(context,
instance,
vm_state=vm_states.ACTIVE,
Expand All @@ -867,6 +867,7 @@ def stop(self, context, instance):
self._cast_compute_message('stop_instance', context,
instance_uuid, host)

@check_vm_state(state_checker.START)
def start(self, context, instance):
"""Start an instance."""
vm_state = instance["vm_state"]
Expand Down Expand Up @@ -1078,7 +1079,8 @@ def _find_host(self, context, instance_uuid):
raise exception.Error(_("Unable to find host for Instance %s")
% instance_uuid)

@scheduler_api.reroute_compute("backup")
@check_vm_state(state_checker.BACKUP)
@scheduler_api.reroute_compute(state_checker.BACKUP)
def backup(self, context, instance, name, backup_type, rotation,
extra_properties=None):
"""Backup the given instance
Expand All @@ -1095,7 +1097,8 @@ def backup(self, context, instance, name, backup_type, rotation,
extra_properties=extra_properties)
return recv_meta

@scheduler_api.reroute_compute("snapshot")
@check_vm_state(state_checker.SNAPSHOT)
@scheduler_api.reroute_compute(state_checker.SNAPSHOT)
def snapshot(self, context, instance, name, extra_properties=None):
"""Snapshot the given instance.
Expand Down Expand Up @@ -1125,12 +1128,6 @@ def _create_image(self, context, instance, name, image_type,
task_state = instance["task_state"]
instance_uuid = instance['uuid']

if task_state == task_states.IMAGE_BACKUP:
raise exception.InstanceBackingUp(instance_uuid=instance_uuid)

if task_state == task_states.IMAGE_SNAPSHOT:
raise exception.InstanceSnapshotting(instance_uuid=instance_uuid)

properties = {
'instance_uuid': instance_uuid,
'user_id': str(context.user_id),
Expand All @@ -1150,7 +1147,8 @@ def _create_image(self, context, instance, name, image_type,
params=params)
return recv_meta

@scheduler_api.reroute_compute("reboot")
@check_vm_state(state_checker.REBOOT)
@scheduler_api.reroute_compute(state_checker.REBOOT)
def reboot(self, context, instance, reboot_type):
"""Reboot the given instance."""
state = {'SOFT': task_states.REBOOTING,
Expand All @@ -1164,16 +1162,13 @@ def reboot(self, context, instance, reboot_type):
instance['uuid'],
params={'reboot_type': reboot_type})

@scheduler_api.reroute_compute("rebuild")
@check_vm_state(state_checker.REBUILD)
@scheduler_api.reroute_compute(state_checker.REBUILD)
def rebuild(self, context, instance, image_href, admin_password,
name=None, metadata=None, files_to_inject=None):
"""Rebuild the given instance with the provided metadata."""
name = name or instance["display_name"]

if instance["vm_state"] != vm_states.ACTIVE:
msg = _("Instance must be active to rebuild.")
raise exception.RebuildRequiresActiveInstance(msg)

files_to_inject = files_to_inject or []
metadata = metadata or {}

Expand All @@ -1199,7 +1194,8 @@ def rebuild(self, context, instance, image_href, admin_password,
instance["uuid"],
params=rebuild_params)

@scheduler_api.reroute_compute("revert_resize")
@check_vm_state(state_checker.REVERT_RESIZE)
@scheduler_api.reroute_compute(state_checker.REVERT_RESIZE)
def revert_resize(self, context, instance):
"""Reverts a resize, deleting the 'new' instance in the process."""
context = context.elevated()
Expand All @@ -1223,7 +1219,8 @@ def revert_resize(self, context, instance):
self.db.migration_update(context, migration_ref['id'],
{'status': 'reverted'})

@scheduler_api.reroute_compute("confirm_resize")
@check_vm_state(state_checker.CONFIRM_RESIZE)
@scheduler_api.reroute_compute(state_checker.CONFIRM_RESIZE)
def confirm_resize(self, context, instance):
"""Confirms a migration/resize and deletes the 'old' instance."""
context = context.elevated()
Expand All @@ -1249,7 +1246,8 @@ def confirm_resize(self, context, instance):
self.db.instance_update(context, instance['uuid'],
{'host': migration_ref['dest_compute'], })

@scheduler_api.reroute_compute("resize")
@check_vm_state(state_checker.RESIZE)
@scheduler_api.reroute_compute(state_checker.RESIZE)
def resize(self, context, instance, flavor_id=None):
"""Resize (ie, migrate) a running instance.
Expand Down Expand Up @@ -1330,7 +1328,8 @@ def add_network_to_project(self, context, project_id):
# didn't raise so this is the correct zone
self.network_api.add_network_to_project(context, project_id)

@scheduler_api.reroute_compute("pause")
@check_vm_state(state_checker.PAUSE)
@scheduler_api.reroute_compute(state_checker.PAUSE)
def pause(self, context, instance):
"""Pause the given instance."""
instance_uuid = instance["uuid"]
Expand All @@ -1340,7 +1339,8 @@ def pause(self, context, instance):
task_state=task_states.PAUSING)
self._cast_compute_message('pause_instance', context, instance_uuid)

@scheduler_api.reroute_compute("unpause")
@check_vm_state(state_checker.UNPAUSE)
@scheduler_api.reroute_compute(state_checker.UNPAUSE)
def unpause(self, context, instance):
"""Unpause the given instance."""
instance_uuid = instance["uuid"]
Expand Down Expand Up @@ -1377,7 +1377,8 @@ def get_actions(self, context, instance):
"""Retrieve actions for the given instance."""
return self.db.instance_get_actions(context, instance['id'])

@scheduler_api.reroute_compute("suspend")
@check_vm_state(state_checker.SUSPEND)
@scheduler_api.reroute_compute(state_checker.SUSPEND)
def suspend(self, context, instance):
"""Suspend the given instance."""
instance_uuid = instance["uuid"]
Expand All @@ -1387,7 +1388,8 @@ def suspend(self, context, instance):
task_state=task_states.SUSPENDING)
self._cast_compute_message('suspend_instance', context, instance_uuid)

@scheduler_api.reroute_compute("resume")
@check_vm_state(state_checker.RESUME)
@scheduler_api.reroute_compute(state_checker.RESUME)
def resume(self, context, instance):
"""Resume the given instance."""
instance_uuid = instance["uuid"]
Expand All @@ -1397,7 +1399,8 @@ def resume(self, context, instance):
task_state=task_states.RESUMING)
self._cast_compute_message('resume_instance', context, instance_uuid)

@scheduler_api.reroute_compute("rescue")
@check_vm_state(state_checker.RESCUE)
@scheduler_api.reroute_compute(state_checker.RESCUE)
def rescue(self, context, instance, rescue_password=None):
"""Rescue the given instance."""
self.update(context,
Expand All @@ -1412,7 +1415,8 @@ def rescue(self, context, instance, rescue_password=None):
instance['uuid'],
params=rescue_params)

@scheduler_api.reroute_compute("unrescue")
@check_vm_state(state_checker.UNRESCUE)
@scheduler_api.reroute_compute(state_checker.UNRESCUE)
def unrescue(self, context, instance):
"""Unrescue the given instance."""
self.update(context,
Expand Down
35 changes: 34 additions & 1 deletion nova/compute/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,34 @@ def _setup_block_device_mapping(self, context, instance):

return (swap, ephemerals, block_device_mapping)

def _is_instance_terminated(self, instance_uuid):
"""Instance in DELETING task state or not found in DB"""
context = nova.context.get_admin_context()
try:
instance = self.db.instance_get_by_uuid(context, instance_uuid)
if instance['task_state'] == task_states.DELETING:
return True
return False
except:
return True

def _shutdown_instance_even_if_deleted(self, context, instance_uuid):
"""Call terminate_instance even for already deleted instances"""
LOG.info(_("Going to force the deletion of the vm %(instance_uuid)s, "
"even if it is deleted") % locals())
try:
try:
self.terminate_instance(context, instance_uuid)
except exception.InstanceNotFound:
LOG.info(_("Instance %(instance_uuid)s did not exist in the "
"DB, but I will shut it down anyway using a special "
"context") % locals())
ctxt = nova.context.get_admin_context(True)
self.terminate_instance(ctxt, instance_uuid)
except Exception as ex:
LOG.info(_("exception terminating the instance "
"%(instance_id)s") % locals())

def _run_instance(self, context, instance_uuid,
requested_networks=None,
injected_files=[],
Expand All @@ -320,9 +348,14 @@ def _run_instance(self, context, instance_uuid,
with utils.save_and_reraise_exception():
self._deallocate_network(context, instance)
self._notify_about_instance_usage(instance)
if self._is_instance_terminated(instance_uuid):
raise exception.InstanceNotFound
except exception.InstanceNotFound:
LOG.exception(_("Instance %s not found.") % instance_uuid)
return # assuming the instance was already deleted
# assuming the instance was already deleted, run "delete" again
# just in case
self._shutdown_instance_even_if_deleted(context, instance_uuid)
return
except Exception as e:
with utils.save_and_reraise_exception():
self._instance_update(context, instance_uuid,
Expand Down
Loading

0 comments on commit ff753cd

Please sign in to comment.