Skip to content

Commit

Permalink
compute: enhance compute evacuate instance to support target state
Browse files Browse the repository at this point in the history
Related to the bp/allowing-target-state-for-evacuate. This change
is extending compute API to accept a new argument targetState.

The targetState argument when set will force state of an evacuated
instance to the destination host.

Signed-off-by: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Change-Id: I9660d42937ad62d647afc6be965f166cc5631392
  • Loading branch information
sahid committed Jan 31, 2023
1 parent 23c5f3d commit 8c2e765
Show file tree
Hide file tree
Showing 15 changed files with 201 additions and 65 deletions.
8 changes: 5 additions & 3 deletions nova/compute/api.py
Expand Up @@ -3797,7 +3797,8 @@ def _reset_image_metadata():
orig_sys_metadata=orig_sys_metadata, bdms=bdms,
preserve_ephemeral=preserve_ephemeral, host=host,
request_spec=request_spec,
reimage_boot_volume=reimage_boot_volume)
reimage_boot_volume=reimage_boot_volume,
target_state=None)

def _check_volume_status(self, context, bdms):
"""Check whether the status of the volume is "in-use".
Expand Down Expand Up @@ -5617,7 +5618,7 @@ def live_migrate_abort(self, context, instance, migration_id,
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.ERROR], task_state=None)
def evacuate(self, context, instance, host, on_shared_storage,
admin_password=None, force=None):
admin_password=None, force=None, target_state=None):
"""Running evacuate to target host.
Checking vm compute host state, if the host not in expected_state,
Expand All @@ -5628,6 +5629,7 @@ def evacuate(self, context, instance, host, on_shared_storage,
:param on_shared_storage: True if instance files on shared storage
:param admin_password: password to set on rebuilt instance
:param force: Force the evacuation to the specific host target
:param target_state: Set a target state for the evacuated instance
"""
LOG.debug('vm evacuation scheduled', instance=instance)
Expand Down Expand Up @@ -5691,7 +5693,7 @@ def evacuate(self, context, instance, host, on_shared_storage,
on_shared_storage=on_shared_storage,
host=host,
request_spec=request_spec,
)
target_state=target_state)

def get_migrations(self, context, filters):
"""Get all migrations for the given filters."""
Expand Down
25 changes: 18 additions & 7 deletions nova/compute/manager.py
Expand Up @@ -618,7 +618,7 @@ def update_compute_provider_status(self, context, rp_uuid, enabled):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""

target = messaging.Target(version='6.1')
target = messaging.Target(version='6.2')

def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
Expand Down Expand Up @@ -3674,7 +3674,7 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
bdms, recreate, on_shared_storage,
preserve_ephemeral, migration,
scheduled_node, limits, request_spec, accel_uuids,
reimage_boot_volume):
reimage_boot_volume, target_state):
"""Destroy and re-make this instance.

A 'rebuild' effectively purges all existing data from the system and
Expand Down Expand Up @@ -3709,6 +3709,7 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
:param reimage_boot_volume: Boolean to specify whether the user has
explicitly requested to rebuild a boot
volume
:param target_state: Set a target state for the evacuated instance.

"""
# recreate=True means the instance is being evacuated from a failed
Expand Down Expand Up @@ -3773,7 +3774,8 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
image_meta, injected_files, new_pass, orig_sys_metadata,
bdms, evacuate, on_shared_storage, preserve_ephemeral,
migration, request_spec, allocs, rebuild_claim,
scheduled_node, limits, accel_uuids, reimage_boot_volume)
scheduled_node, limits, accel_uuids, reimage_boot_volume,
target_state)
except (exception.ComputeResourcesUnavailable,
exception.RescheduledException) as e:
if isinstance(e, exception.ComputeResourcesUnavailable):
Expand Down Expand Up @@ -3833,7 +3835,7 @@ def _do_rebuild_instance_with_claim(
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, rebuild_claim, scheduled_node, limits, accel_uuids,
reimage_boot_volume):
reimage_boot_volume, target_state):
"""Helper to avoid deep nesting in the top-level method."""

provider_mapping = None
Expand All @@ -3857,7 +3859,8 @@ def _do_rebuild_instance_with_claim(
context, instance, orig_image_ref, image_meta, injected_files,
new_pass, orig_sys_metadata, bdms, evacuate, on_shared_storage,
preserve_ephemeral, migration, request_spec, allocations,
provider_mapping, accel_uuids, reimage_boot_volume)
provider_mapping, accel_uuids, reimage_boot_volume,
target_state)

@staticmethod
def _get_image_name(image_meta):
Expand All @@ -3871,10 +3874,18 @@ def _do_rebuild_instance(
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, request_group_resource_providers_mapping,
accel_uuids, reimage_boot_volume):
accel_uuids, reimage_boot_volume, target_state):
orig_vm_state = instance.vm_state

if evacuate:
if target_state and orig_vm_state != vm_states.ERROR:
# This will ensure that at destination the instance will have
# the desired state.
if target_state not in vm_states.ALLOW_TARGET_STATES:
raise exception.InstanceEvacuateNotSupportedTargetState(
target_state=target_state)
orig_vm_state = target_state

if request_spec:
# NOTE(gibi): Do a late check of server group policy as
# parallel scheduling could violate such policy. This will
Expand Down Expand Up @@ -11347,7 +11358,7 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
bdms, recreate, on_shared_storage,
preserve_ephemeral, migration,
scheduled_node, limits, request_spec,
accel_uuids, False)
accel_uuids, False, None)

# 5.13 support for optional accel_uuids argument
def shelve_instance(self, context, instance, image_id,
Expand Down
18 changes: 14 additions & 4 deletions nova/compute/rpcapi.py
Expand Up @@ -403,6 +403,7 @@ class ComputeAPI(object):
* ... - Rename the instance_type argument of resize_instance() to
flavor
* 6.1 - Add reimage_boot_volume parameter to rebuild_instance()
* 6.2 - Add target_state parameter to rebuild_instance()
'''

VERSION_ALIASES = {
Expand All @@ -424,6 +425,7 @@ class ComputeAPI(object):
'xena': '6.0',
'yoga': '6.0',
'zed': '6.1',
'antilope': '6.2',
}

@property
Expand Down Expand Up @@ -1083,7 +1085,7 @@ def rebuild_instance(
image_ref, orig_image_ref, orig_sys_metadata, bdms,
recreate, on_shared_storage, host, node,
preserve_ephemeral, migration, limits, request_spec, accel_uuids,
reimage_boot_volume):
reimage_boot_volume, target_state):

# NOTE(edleafe): compute nodes can only use the dict form of limits.
if isinstance(limits, objects.SchedulerLimits):
Expand All @@ -1096,11 +1098,19 @@ def rebuild_instance(
'limits': limits,
'request_spec': request_spec,
'accel_uuids': accel_uuids,
'reimage_boot_volume': reimage_boot_volume
'reimage_boot_volume': reimage_boot_volume,
'target_state': target_state,
}

version = '6.1'
version = '6.2'
client = self.router.client(ctxt)
if not client.can_send_version(version):
if msg_args['target_state']:
raise exception.UnsupportedRPCVersion(
api="rebuild_instance",
required="6.2")
else:
del msg_args['target_state']
version = '6.1'
if not client.can_send_version(version):
if msg_args['reimage_boot_volume']:
raise exception.NovaException(
Expand Down
3 changes: 3 additions & 0 deletions nova/compute/vm_states.py
Expand Up @@ -76,3 +76,6 @@

# states we allow resources to be freed in
ALLOW_RESOURCE_REMOVAL = [DELETED, SHELVED_OFFLOADED]

# states we allow for evacuate instance
ALLOW_TARGET_STATES = [STOPPED]
6 changes: 4 additions & 2 deletions nova/conductor/api.py
Expand Up @@ -144,7 +144,8 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
injected_files, new_pass, orig_sys_metadata,
bdms, recreate=False, on_shared_storage=False,
preserve_ephemeral=False, host=None,
request_spec=None, reimage_boot_volume=False):
request_spec=None, reimage_boot_volume=False,
target_state=None):
self.conductor_compute_rpcapi.rebuild_instance(context,
instance=instance,
new_pass=new_pass,
Expand All @@ -158,7 +159,8 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
preserve_ephemeral=preserve_ephemeral,
host=host,
request_spec=request_spec,
reimage_boot_volume=reimage_boot_volume)
reimage_boot_volume=reimage_boot_volume,
target_state=target_state)

def cache_images(self, context, aggregate, image_ids):
"""Request images be pre-cached on hosts within an aggregate.
Expand Down
8 changes: 5 additions & 3 deletions nova/conductor/manager.py
Expand Up @@ -235,7 +235,7 @@ class ComputeTaskManager:
may involve coordinating activities on multiple compute nodes.
"""

target = messaging.Target(namespace='compute_task', version='1.24')
target = messaging.Target(namespace='compute_task', version='1.25')

def __init__(self):
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
Expand Down Expand Up @@ -1152,7 +1152,8 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage,
preserve_ephemeral=False, host=None,
request_spec=None, reimage_boot_volume=False):
request_spec=None, reimage_boot_volume=False,
target_state=None):
# recreate=True means the instance is being evacuated from a failed
# host to a new destination host. The 'recreate' variable name is
# confusing, so rename it to evacuate here at the top, which is simpler
Expand Down Expand Up @@ -1356,7 +1357,8 @@ def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
limits=limits,
request_spec=request_spec,
accel_uuids=accel_uuids,
reimage_boot_volume=reimage_boot_volume)
reimage_boot_volume=reimage_boot_volume,
target_state=target_state)

def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied
Expand Down
15 changes: 12 additions & 3 deletions nova/conductor/rpcapi.py
Expand Up @@ -287,6 +287,7 @@ class ComputeTaskAPI(object):
1.22 - Added confirm_snapshot_based_resize()
1.23 - Added revert_snapshot_based_resize()
1.24 - Add reimage_boot_volume parameter to rebuild_instance()
1.25 - Add target_state parameter to rebuild_instance()
"""

def __init__(self):
Expand Down Expand Up @@ -428,8 +429,8 @@ def rebuild_instance(self, ctxt, instance, new_pass, injected_files,
image_ref, orig_image_ref, orig_sys_metadata, bdms,
recreate=False, on_shared_storage=False, host=None,
preserve_ephemeral=False, request_spec=None,
reimage_boot_volume=False):
version = '1.24'
reimage_boot_volume=False, target_state=None):
version = '1.25'
kw = {'instance': instance,
'new_pass': new_pass,
'injected_files': injected_files,
Expand All @@ -442,8 +443,16 @@ def rebuild_instance(self, ctxt, instance, new_pass, injected_files,
'preserve_ephemeral': preserve_ephemeral,
'host': host,
'request_spec': request_spec,
'reimage_boot_volume': reimage_boot_volume
'reimage_boot_volume': reimage_boot_volume,
'target_state': target_state,
}
if not self.client.can_send_version(version):
if kw['target_state']:
raise exception.UnsupportedRPCVersion(
api="rebuild_instance", required="1.25")
else:
del kw['target_state']
version = '1.24'
if not self.client.can_send_version(version):
if kw['reimage_boot_volume']:
raise exception.NovaException(
Expand Down
10 changes: 10 additions & 0 deletions nova/exception.py
Expand Up @@ -1451,6 +1451,11 @@ class InstanceEvacuateNotSupported(Invalid):
msg_fmt = _('Instance evacuate is not supported.')


class InstanceEvacuateNotSupportedTargetState(Invalid):
msg_fmt = _("Target state '%(target_state)s' for instance evacuate "
"is not supported.")


class DBNotAllowed(NovaException):
msg_fmt = _('%(binary)s attempted direct database access which is '
'not allowed by policy')
Expand Down Expand Up @@ -1479,6 +1484,11 @@ class UnsupportedRescueImage(Invalid):
msg_fmt = _("Requested rescue image '%(image)s' is not supported")


class UnsupportedRPCVersion(Invalid):
msg_fmt = _("Unsupported RPC version for %(api)s. "
"Required >= %(required)s")


class Base64Exception(NovaException):
msg_fmt = _("Invalid Base 64 data for file %(path)s")

Expand Down
5 changes: 4 additions & 1 deletion nova/objects/service.py
Expand Up @@ -31,7 +31,7 @@


# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 65
SERVICE_VERSION = 66


# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
Expand Down Expand Up @@ -228,6 +228,9 @@
# Version 65: Compute RPC v6.1:
# Added stable local node identity
{'compute_rpc': '6.1'},
# Version 66: Compute RPC v6.2:
# Add target_state parameter to rebuild_instance()
{'compute_rpc': '6.2'},
)

# This is the version after which we can rely on having a persistent
Expand Down
14 changes: 7 additions & 7 deletions nova/tests/functional/api_sample_tests/test_evacuate.py
Expand Up @@ -80,7 +80,7 @@ def test_server_evacuate(self, rebuild_mock):
orig_sys_metadata=mock.ANY, bdms=mock.ANY, recreate=mock.ANY,
on_shared_storage=False, preserve_ephemeral=mock.ANY,
host='testHost', request_spec=mock.ANY,
reimage_boot_volume=False)
reimage_boot_volume=False, target_state=None)

@mock.patch('nova.conductor.manager.ComputeTaskManager.rebuild_instance')
def test_server_evacuate_find_host(self, rebuild_mock):
Expand All @@ -97,7 +97,7 @@ def test_server_evacuate_find_host(self, rebuild_mock):
orig_sys_metadata=mock.ANY, bdms=mock.ANY, recreate=mock.ANY,
on_shared_storage=False, preserve_ephemeral=mock.ANY,
host=None, request_spec=mock.ANY,
reimage_boot_volume=False)
reimage_boot_volume=False, target_state=None)


class EvacuateJsonTestV214(EvacuateJsonTest):
Expand All @@ -119,7 +119,7 @@ def test_server_evacuate(self, rebuild_mock):
orig_sys_metadata=mock.ANY, bdms=mock.ANY, recreate=mock.ANY,
on_shared_storage=None, preserve_ephemeral=mock.ANY,
host='testHost', request_spec=mock.ANY,
reimage_boot_volume=False)
reimage_boot_volume=False, target_state=None)

@mock.patch('nova.conductor.manager.ComputeTaskManager.rebuild_instance')
def test_server_evacuate_find_host(self, rebuild_mock):
Expand All @@ -135,7 +135,7 @@ def test_server_evacuate_find_host(self, rebuild_mock):
orig_sys_metadata=mock.ANY, bdms=mock.ANY, recreate=mock.ANY,
on_shared_storage=None, preserve_ephemeral=mock.ANY,
host=None, request_spec=mock.ANY,
reimage_boot_volume=False)
reimage_boot_volume=False, target_state=None)


class EvacuateJsonTestV229(EvacuateJsonTestV214):
Expand Down Expand Up @@ -163,7 +163,7 @@ def test_server_evacuate(self, compute_node_get_all_by_host, rebuild_mock):
orig_sys_metadata=mock.ANY, bdms=mock.ANY, recreate=mock.ANY,
on_shared_storage=None, preserve_ephemeral=mock.ANY,
host=None, request_spec=mock.ANY,
reimage_boot_volume=False)
reimage_boot_volume=False, target_state=None)

@mock.patch('nova.conductor.manager.ComputeTaskManager.rebuild_instance')
@mock.patch('nova.objects.ComputeNodeList.get_all_by_host')
Expand All @@ -184,7 +184,7 @@ def test_server_evacuate_with_force(self, compute_node_get_all_by_host,
orig_sys_metadata=mock.ANY, bdms=mock.ANY, recreate=mock.ANY,
on_shared_storage=None, preserve_ephemeral=mock.ANY,
host='testHost', request_spec=mock.ANY,
reimage_boot_volume=False)
reimage_boot_volume=False, target_state=None)


class EvacuateJsonTestV268(EvacuateJsonTestV229):
Expand All @@ -211,7 +211,7 @@ def test_server_evacuate(self, compute_node_get_all_by_host, rebuild_mock):
orig_sys_metadata=mock.ANY, bdms=mock.ANY, recreate=mock.ANY,
on_shared_storage=None, preserve_ephemeral=mock.ANY,
host=None, request_spec=mock.ANY,
reimage_boot_volume=False)
reimage_boot_volume=False, target_state=None)

def test_server_evacuate_with_force(self):
# doesn't apply to v2.68+, which removed the ability to force migrate
Expand Down

0 comments on commit 8c2e765

Please sign in to comment.