Permalink
7d2223e Jul 11, 2018
5893 lines (5154 sloc) 269 KB
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# Copyright 2011 Piston Cloud Computing, Inc.
# Copyright 2012-2013 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Handles all requests relating to compute resources (e.g. guest VMs,
networking and storage of VMs, and compute hosts on which they run)."""
import collections
import copy
import functools
import re
import string
from castellan import key_manager
from oslo_log import log as logging
from oslo_messaging import exceptions as oslo_exceptions
from oslo_serialization import base64 as base64utils
from oslo_utils import excutils
from oslo_utils import strutils
from oslo_utils import timeutils
from oslo_utils import units
from oslo_utils import uuidutils
import six
from six.moves import range
from nova import availability_zones
from nova import block_device
from nova.cells import opts as cells_opts
from nova.compute import flavors
from nova.compute import instance_actions
from nova.compute import instance_list
from nova.compute import migration_list
from nova.compute import power_state
from nova.compute import rpcapi as compute_rpcapi
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute.utils import wrap_instance_event
from nova.compute import vm_states
from nova import conductor
import nova.conf
from nova.consoleauth import rpcapi as consoleauth_rpcapi
from nova import context as nova_context
from nova import crypto
from nova.db import base
from nova import exception
from nova import exception_wrapper
from nova import hooks
from nova.i18n import _
from nova import image
from nova import network
from nova.network import model as network_model
from nova.network.security_group import openstack_driver
from nova.network.security_group import security_group_base
from nova import objects
from nova.objects import base as obj_base
from nova.objects import block_device as block_device_obj
from nova.objects import fields as fields_obj
from nova.objects import keypair as keypair_obj
from nova.objects import quotas as quotas_obj
from nova.pci import request as pci_request
from nova.policies import servers as servers_policies
import nova.policy
from nova import profiler
from nova import rpc
from nova.scheduler import client as scheduler_client
from nova.scheduler import utils as scheduler_utils
from nova import servicegroup
from nova import utils
from nova.virt import hardware
from nova.volume import cinder
LOG = logging.getLogger(__name__)
get_notifier = functools.partial(rpc.get_notifier, service='compute')
# NOTE(gibi): legacy notification used compute as a service but these
# calls still run on the client side of the compute service which is
# nova-api. By setting the binary to nova-api below, we can make sure
# that the new versioned notifications has the right publisher_id but the
# legacy notifications does not change.
wrap_exception = functools.partial(exception_wrapper.wrap_exception,
get_notifier=get_notifier,
binary='nova-api')
CONF = nova.conf.CONF
RO_SECURITY_GROUPS = ['default']
AGGREGATE_ACTION_UPDATE = 'Update'
AGGREGATE_ACTION_UPDATE_META = 'UpdateMeta'
AGGREGATE_ACTION_DELETE = 'Delete'
AGGREGATE_ACTION_ADD = 'Add'
BFV_RESERVE_MIN_COMPUTE_VERSION = 17
CINDER_V3_ATTACH_MIN_COMPUTE_VERSION = 24
MIN_COMPUTE_MULTIATTACH = 27
MIN_COMPUTE_TRUSTED_CERTS = 31
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
# trigger.
CELLS = []
def check_instance_state(vm_state=None, task_state=(None,),
must_have_launched=True):
"""Decorator to check VM and/or task state before entry to API functions.
If the instance is in the wrong state, or has not been successfully
started at least once the wrapper will raise an exception.
"""
if vm_state is not None and not isinstance(vm_state, set):
vm_state = set(vm_state)
if task_state is not None and not isinstance(task_state, set):
task_state = set(task_state)
def outer(f):
@six.wraps(f)
def inner(self, context, instance, *args, **kw):
if vm_state is not None and instance.vm_state not in vm_state:
raise exception.InstanceInvalidState(
attr='vm_state',
instance_uuid=instance.uuid,
state=instance.vm_state,
method=f.__name__)
if (task_state is not None and
instance.task_state not in task_state):
raise exception.InstanceInvalidState(
attr='task_state',
instance_uuid=instance.uuid,
state=instance.task_state,
method=f.__name__)
if must_have_launched and not instance.launched_at:
raise exception.InstanceInvalidState(
attr='launched_at',
instance_uuid=instance.uuid,
state=instance.launched_at,
method=f.__name__)
return f(self, context, instance, *args, **kw)
return inner
return outer
def _set_or_none(q):
return q if q is None or isinstance(q, set) else set(q)
def reject_instance_state(vm_state=None, task_state=None):
"""Decorator. Raise InstanceInvalidState if instance is in any of the
given states.
"""
vm_state = _set_or_none(vm_state)
task_state = _set_or_none(task_state)
def outer(f):
@six.wraps(f)
def inner(self, context, instance, *args, **kw):
_InstanceInvalidState = functools.partial(
exception.InstanceInvalidState,
instance_uuid=instance.uuid,
method=f.__name__)
if vm_state is not None and instance.vm_state in vm_state:
raise _InstanceInvalidState(
attr='vm_state', state=instance.vm_state)
if task_state is not None and instance.task_state in task_state:
raise _InstanceInvalidState(
attr='task_state', state=instance.task_state)
return f(self, context, instance, *args, **kw)
return inner
return outer
def check_instance_host(function):
@six.wraps(function)
def wrapped(self, context, instance, *args, **kwargs):
if not instance.host:
raise exception.InstanceNotReady(instance_id=instance.uuid)
return function(self, context, instance, *args, **kwargs)
return wrapped
def check_instance_lock(function):
@six.wraps(function)
def inner(self, context, instance, *args, **kwargs):
if instance.locked and not context.is_admin:
raise exception.InstanceIsLocked(instance_uuid=instance.uuid)
return function(self, context, instance, *args, **kwargs)
return inner
def check_instance_cell(fn):
@six.wraps(fn)
def _wrapped(self, context, instance, *args, **kwargs):
self._validate_cell(instance)
return fn(self, context, instance, *args, **kwargs)
return _wrapped
def _diff_dict(orig, new):
"""Return a dict describing how to change orig to new. The keys
correspond to values that have changed; the value will be a list
of one or two elements. The first element of the list will be
either '+' or '-', indicating whether the key was updated or
deleted; if the key was updated, the list will contain a second
element, giving the updated value.
"""
# Figure out what keys went away
result = {k: ['-'] for k in set(orig.keys()) - set(new.keys())}
# Compute the updates
for key, value in new.items():
if key not in orig or value != orig[key]:
result[key] = ['+', value]
return result
def load_cells():
global CELLS
if not CELLS:
CELLS = objects.CellMappingList.get_all(
nova_context.get_admin_context())
LOG.debug('Found %(count)i cells: %(cells)s',
dict(count=len(CELLS),
cells=','.join([c.identity for c in CELLS])))
if not CELLS:
LOG.error('No cells are configured, unable to continue')
@profiler.trace_cls("compute_api")
class API(base.Base):
"""API for interacting with the compute manager."""
def __init__(self, image_api=None, network_api=None, volume_api=None,
security_group_api=None, **kwargs):
self.image_api = image_api or image.API()
self.network_api = network_api or network.API()
self.volume_api = volume_api or cinder.API()
# NOTE(mriedem): This looks a bit weird but we get the reportclient
# via SchedulerClient since it lazy-loads SchedulerReportClient on
# the first usage which helps to avoid a bunch of lockutils spam in
# the nova-api logs every time the service is restarted (remember
# that pretty much all of the REST API controllers construct this
# API class).
self.placementclient = scheduler_client.SchedulerClient().reportclient
self.security_group_api = (security_group_api or
openstack_driver.get_openstack_security_group_driver())
self.consoleauth_rpcapi = consoleauth_rpcapi.ConsoleAuthAPI()
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
self.compute_task_api = conductor.ComputeTaskAPI()
self.servicegroup_api = servicegroup.API()
self.notifier = rpc.get_notifier('compute', CONF.host)
if CONF.ephemeral_storage_encryption.enabled:
self.key_manager = key_manager.API()
# Help us to record host in EventReporter
self.host = CONF.host
super(API, self).__init__(**kwargs)
@property
def cell_type(self):
try:
return getattr(self, '_cell_type')
except AttributeError:
self._cell_type = cells_opts.get_cell_type()
return self._cell_type
def _validate_cell(self, instance):
if self.cell_type != 'api':
return
cell_name = instance.cell_name
if not cell_name:
raise exception.InstanceUnknownCell(
instance_uuid=instance.uuid)
def _record_action_start(self, context, instance, action):
objects.InstanceAction.action_start(context, instance.uuid,
action, want_result=False)
def _check_injected_file_quota(self, context, injected_files):
"""Enforce quota limits on injected files.
Raises a QuotaError if any limit is exceeded.
"""
if injected_files is None:
return
# Check number of files first
try:
objects.Quotas.limit_check(context,
injected_files=len(injected_files))
except exception.OverQuota:
raise exception.OnsetFileLimitExceeded()
# OK, now count path and content lengths; we're looking for
# the max...
max_path = 0
max_content = 0
for path, content in injected_files:
max_path = max(max_path, len(path))
max_content = max(max_content, len(content))
try:
objects.Quotas.limit_check(context,
injected_file_path_bytes=max_path,
injected_file_content_bytes=max_content)
except exception.OverQuota as exc:
# Favor path limit over content limit for reporting
# purposes
if 'injected_file_path_bytes' in exc.kwargs['overs']:
raise exception.OnsetFilePathLimitExceeded(
allowed=exc.kwargs['quotas']['injected_file_path_bytes'])
else:
raise exception.OnsetFileContentLimitExceeded(
allowed=exc.kwargs['quotas']['injected_file_content_bytes'])
def _check_metadata_properties_quota(self, context, metadata=None):
"""Enforce quota limits on metadata properties."""
if not metadata:
metadata = {}
if not isinstance(metadata, dict):
msg = (_("Metadata type should be dict."))
raise exception.InvalidMetadata(reason=msg)
num_metadata = len(metadata)
try:
objects.Quotas.limit_check(context, metadata_items=num_metadata)
except exception.OverQuota as exc:
quota_metadata = exc.kwargs['quotas']['metadata_items']
raise exception.MetadataLimitExceeded(allowed=quota_metadata)
# Because metadata is stored in the DB, we hard-code the size limits
# In future, we may support more variable length strings, so we act
# as if this is quota-controlled for forwards compatibility.
# Those are only used in V2 API, from V2.1 API, those checks are
# validated at API layer schema validation.
for k, v in metadata.items():
try:
utils.check_string_length(v)
utils.check_string_length(k, min_length=1)
except exception.InvalidInput as e:
raise exception.InvalidMetadata(reason=e.format_message())
if len(k) > 255:
msg = _("Metadata property key greater than 255 characters")
raise exception.InvalidMetadataSize(reason=msg)
if len(v) > 255:
msg = _("Metadata property value greater than 255 characters")
raise exception.InvalidMetadataSize(reason=msg)
def _check_requested_secgroups(self, context, secgroups):
"""Check if the security group requested exists and belongs to
the project.
:param context: The nova request context.
:type context: nova.context.RequestContext
:param secgroups: list of requested security group names, or uuids in
the case of Neutron.
:type secgroups: list
:returns: list of requested security group names unmodified if using
nova-network. If using Neutron, the list returned is all uuids.
Note that 'default' is a special case and will be unmodified if
it's requested.
"""
security_groups = []
for secgroup in secgroups:
# NOTE(sdague): default is handled special
if secgroup == "default":
security_groups.append(secgroup)
continue
secgroup_dict = self.security_group_api.get(context, secgroup)
if not secgroup_dict:
raise exception.SecurityGroupNotFoundForProject(
project_id=context.project_id, security_group_id=secgroup)
# Check to see if it's a nova-network or neutron type.
if isinstance(secgroup_dict['id'], int):
# This is nova-network so just return the requested name.
security_groups.append(secgroup)
else:
# The id for neutron is a uuid, so we return the id (uuid).
security_groups.append(secgroup_dict['id'])
return security_groups
def _check_requested_networks(self, context, requested_networks,
max_count):
"""Check if the networks requested belongs to the project
and the fixed IP address for each network provided is within
same the network block
"""
if requested_networks is not None:
if requested_networks.no_allocate:
# If the network request was specifically 'none' meaning don't
# allocate any networks, we just return the number of requested
# instances since quotas don't change at all.
return max_count
# NOTE(danms): Temporary transition
requested_networks = requested_networks.as_tuples()
return self.network_api.validate_networks(context, requested_networks,
max_count)
def _handle_kernel_and_ramdisk(self, context, kernel_id, ramdisk_id,
image):
"""Choose kernel and ramdisk appropriate for the instance.
The kernel and ramdisk can be chosen in one of two ways:
1. Passed in with create-instance request.
2. Inherited from image metadata.
If inherited from image metadata, and if that image metadata value is
set to 'nokernel', both kernel and ramdisk will default to None.
"""
# Inherit from image if not specified
image_properties = image.get('properties', {})
if kernel_id is None:
kernel_id = image_properties.get('kernel_id')
if ramdisk_id is None:
ramdisk_id = image_properties.get('ramdisk_id')
# Force to None if kernel_id indicates that a kernel is not to be used
if kernel_id == 'nokernel':
kernel_id = None
ramdisk_id = None
# Verify kernel and ramdisk exist (fail-fast)
if kernel_id is not None:
kernel_image = self.image_api.get(context, kernel_id)
# kernel_id could have been a URI, not a UUID, so to keep behaviour
# from before, which leaked that implementation detail out to the
# caller, we return the image UUID of the kernel image and ramdisk
# image (below) and not any image URIs that might have been
# supplied.
# TODO(jaypipes): Get rid of this silliness once we move to a real
# Image object and hide all of that stuff within nova.image.api.
kernel_id = kernel_image['id']
if ramdisk_id is not None:
ramdisk_image = self.image_api.get(context, ramdisk_id)
ramdisk_id = ramdisk_image['id']
return kernel_id, ramdisk_id
@staticmethod
def parse_availability_zone(context, availability_zone):
# NOTE(vish): We have a legacy hack to allow admins to specify hosts
# via az using az:host:node. It might be nice to expose an
# api to specify specific hosts to force onto, but for
# now it just supports this legacy hack.
# NOTE(deva): It is also possible to specify az::node, in which case
# the host manager will determine the correct host.
forced_host = None
forced_node = None
if availability_zone and ':' in availability_zone:
c = availability_zone.count(':')
if c == 1:
availability_zone, forced_host = availability_zone.split(':')
elif c == 2:
if '::' in availability_zone:
availability_zone, forced_node = \
availability_zone.split('::')
else:
availability_zone, forced_host, forced_node = \
availability_zone.split(':')
else:
raise exception.InvalidInput(
reason="Unable to parse availability_zone")
if not availability_zone:
availability_zone = CONF.default_schedule_zone
return availability_zone, forced_host, forced_node
def _ensure_auto_disk_config_is_valid(self, auto_disk_config_img,
auto_disk_config, image):
auto_disk_config_disabled = \
utils.is_auto_disk_config_disabled(auto_disk_config_img)
if auto_disk_config_disabled and auto_disk_config:
raise exception.AutoDiskConfigDisabledByImage(image=image)
def _inherit_properties_from_image(self, image, auto_disk_config):
image_properties = image.get('properties', {})
auto_disk_config_img = \
utils.get_auto_disk_config_from_image_props(image_properties)
self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
auto_disk_config,
image.get("id"))
if auto_disk_config is None:
auto_disk_config = strutils.bool_from_string(auto_disk_config_img)
return {
'os_type': image_properties.get('os_type'),
'architecture': image_properties.get('architecture'),
'vm_mode': image_properties.get('vm_mode'),
'auto_disk_config': auto_disk_config
}
def _check_config_drive(self, config_drive):
if config_drive:
try:
bool_val = strutils.bool_from_string(config_drive,
strict=True)
except ValueError:
raise exception.ConfigDriveInvalidValue(option=config_drive)
else:
bool_val = False
# FIXME(comstud): Bug ID 1193438 filed for this. This looks silly,
# but this is because the config drive column is a String. False
# is represented by using an empty string. And for whatever
# reason, we rely on the DB to cast True to a String.
return True if bool_val else ''
def _check_requested_image(self, context, image_id, image,
instance_type, root_bdm):
if not image:
return
if image['status'] != 'active':
raise exception.ImageNotActive(image_id=image_id)
image_properties = image.get('properties', {})
config_drive_option = image_properties.get(
'img_config_drive', 'optional')
if config_drive_option not in ['optional', 'mandatory']:
raise exception.InvalidImageConfigDrive(
config_drive=config_drive_option)
if instance_type['memory_mb'] < int(image.get('min_ram') or 0):
raise exception.FlavorMemoryTooSmall()
# Image min_disk is in gb, size is in bytes. For sanity, have them both
# in bytes.
image_min_disk = int(image.get('min_disk') or 0) * units.Gi
image_size = int(image.get('size') or 0)
# Target disk is a volume. Don't check flavor disk size because it
# doesn't make sense, and check min_disk against the volume size.
if (root_bdm is not None and root_bdm.is_volume):
# There are 2 possibilities here: either the target volume already
# exists, or it doesn't, in which case the bdm will contain the
# intended volume size.
#
# Cinder does its own check against min_disk, so if the target
# volume already exists this has already been done and we don't
# need to check it again here. In this case, volume_size may not be
# set on the bdm.
#
# If we're going to create the volume, the bdm will contain
# volume_size. Therefore we should check it if it exists. This will
# still be checked again by cinder when the volume is created, but
# that will not happen until the request reaches a host. By
# checking it here, the user gets an immediate and useful failure
# indication.
#
# The third possibility is that we have failed to consider
# something, and there are actually more than 2 possibilities. In
# this case cinder will still do the check at volume creation time.
# The behaviour will still be correct, but the user will not get an
# immediate failure from the api, and will instead have to
# determine why the instance is in an error state with a task of
# block_device_mapping.
#
# We could reasonably refactor this check into _validate_bdm at
# some future date, as the various size logic is already split out
# in there.
dest_size = root_bdm.volume_size
if dest_size is not None:
dest_size *= units.Gi
if image_min_disk > dest_size:
raise exception.VolumeSmallerThanMinDisk(
volume_size=dest_size, image_min_disk=image_min_disk)
# Target disk is a local disk whose size is taken from the flavor
else:
dest_size = instance_type['root_gb'] * units.Gi
# NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
# since libvirt interpreted the value differently than other
# drivers. A value of 0 means don't check size.
if dest_size != 0:
if image_size > dest_size:
raise exception.FlavorDiskSmallerThanImage(
flavor_size=dest_size, image_size=image_size)
if image_min_disk > dest_size:
raise exception.FlavorDiskSmallerThanMinDisk(
flavor_size=dest_size, image_min_disk=image_min_disk)
else:
# The user is attempting to create a server with a 0-disk
# image-backed flavor, which can lead to issues with a large
# image consuming an unexpectedly large amount of local disk
# on the compute host. Check to see if the deployment will
# allow that.
if not context.can(
servers_policies.ZERO_DISK_FLAVOR, fatal=False):
raise exception.BootFromVolumeRequiredForZeroDiskFlavor()
def _get_image_defined_bdms(self, instance_type, image_meta,
root_device_name):
image_properties = image_meta.get('properties', {})
# Get the block device mappings defined by the image.
image_defined_bdms = image_properties.get('block_device_mapping', [])
legacy_image_defined = not image_properties.get('bdm_v2', False)
image_mapping = image_properties.get('mappings', [])
if legacy_image_defined:
image_defined_bdms = block_device.from_legacy_mapping(
image_defined_bdms, None, root_device_name)
else:
image_defined_bdms = list(map(block_device.BlockDeviceDict,
image_defined_bdms))
if image_mapping:
image_mapping = self._prepare_image_mapping(instance_type,
image_mapping)
image_defined_bdms = self._merge_bdms_lists(
image_mapping, image_defined_bdms)
return image_defined_bdms
def _get_flavor_defined_bdms(self, instance_type, block_device_mapping):
flavor_defined_bdms = []
have_ephemeral_bdms = any(filter(
block_device.new_format_is_ephemeral, block_device_mapping))
have_swap_bdms = any(filter(
block_device.new_format_is_swap, block_device_mapping))
if instance_type.get('ephemeral_gb') and not have_ephemeral_bdms:
flavor_defined_bdms.append(
block_device.create_blank_bdm(instance_type['ephemeral_gb']))
if instance_type.get('swap') and not have_swap_bdms:
flavor_defined_bdms.append(
block_device.create_blank_bdm(instance_type['swap'], 'swap'))
return flavor_defined_bdms
def _merge_bdms_lists(self, overridable_mappings, overrider_mappings):
"""Override any block devices from the first list by device name
:param overridable_mappings: list which items are overridden
:param overrider_mappings: list which items override
:returns: A merged list of bdms
"""
device_names = set(bdm['device_name'] for bdm in overrider_mappings
if bdm['device_name'])
return (overrider_mappings +
[bdm for bdm in overridable_mappings
if bdm['device_name'] not in device_names])
def _check_and_transform_bdm(self, context, base_options, instance_type,
image_meta, min_count, max_count,
block_device_mapping, legacy_bdm):
# NOTE (ndipanov): Assume root dev name is 'vda' if not supplied.
# It's needed for legacy conversion to work.
root_device_name = (base_options.get('root_device_name') or 'vda')
image_ref = base_options.get('image_ref', '')
# If the instance is booted by image and has a volume attached,
# the volume cannot have the same device name as root_device_name
if image_ref:
for bdm in block_device_mapping:
if (bdm.get('destination_type') == 'volume' and
block_device.strip_dev(bdm.get(
'device_name')) == root_device_name):
msg = _('The volume cannot be assigned the same device'
' name as the root device %s') % root_device_name
raise exception.InvalidRequest(msg)
image_defined_bdms = self._get_image_defined_bdms(
instance_type, image_meta, root_device_name)
root_in_image_bdms = (
block_device.get_root_bdm(image_defined_bdms) is not None)
if legacy_bdm:
block_device_mapping = block_device.from_legacy_mapping(
block_device_mapping, image_ref, root_device_name,
no_root=root_in_image_bdms)
elif root_in_image_bdms:
# NOTE (ndipanov): client will insert an image mapping into the v2
# block_device_mapping, but if there is a bootable device in image
# mappings - we need to get rid of the inserted image
# NOTE (gibi): another case is when a server is booted with an
# image to bdm mapping where the image only contains a bdm to a
# snapshot. In this case the other image to bdm mapping
# contains an unnecessary device with boot_index == 0.
# Also in this case the image_ref is None as we are booting from
# an image to volume bdm.
def not_image_and_root_bdm(bdm):
return not (bdm.get('boot_index') == 0 and
bdm.get('source_type') == 'image')
block_device_mapping = list(
filter(not_image_and_root_bdm, block_device_mapping))
block_device_mapping = self._merge_bdms_lists(
image_defined_bdms, block_device_mapping)
if min_count > 1 or max_count > 1:
if any(map(lambda bdm: bdm['source_type'] == 'volume',
block_device_mapping)):
msg = _('Cannot attach one or more volumes to multiple'
' instances')
raise exception.InvalidRequest(msg)
block_device_mapping += self._get_flavor_defined_bdms(
instance_type, block_device_mapping)
return block_device_obj.block_device_make_list_from_dicts(
context, block_device_mapping)
def _get_image(self, context, image_href):
if not image_href:
return None, {}
image = self.image_api.get(context, image_href)
return image['id'], image
def _checks_for_create_and_rebuild(self, context, image_id, image,
instance_type, metadata,
files_to_inject, root_bdm):
self._check_metadata_properties_quota(context, metadata)
self._check_injected_file_quota(context, files_to_inject)
self._check_requested_image(context, image_id, image,
instance_type, root_bdm)
def _validate_and_build_base_options(self, context, instance_type,
boot_meta, image_href, image_id,
kernel_id, ramdisk_id, display_name,
display_description, key_name,
key_data, security_groups,
availability_zone, user_data,
metadata, access_ip_v4, access_ip_v6,
requested_networks, config_drive,
auto_disk_config, reservation_id,
max_count):
"""Verify all the input parameters regardless of the provisioning
strategy being performed.
"""
if instance_type['disabled']:
raise exception.FlavorNotFound(flavor_id=instance_type['id'])
if user_data:
try:
base64utils.decode_as_bytes(user_data)
except TypeError:
raise exception.InstanceUserDataMalformed()
# When using Neutron, _check_requested_secgroups will translate and
# return any requested security group names to uuids.
security_groups = (
self._check_requested_secgroups(context, security_groups))
# Note: max_count is the number of instances requested by the user,
# max_network_count is the maximum number of instances taking into
# account any network quotas
max_network_count = self._check_requested_networks(context,
requested_networks, max_count)
kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
context, kernel_id, ramdisk_id, boot_meta)
config_drive = self._check_config_drive(config_drive)
if key_data is None and key_name is not None:
key_pair = objects.KeyPair.get_by_name(context,
context.user_id,
key_name)
key_data = key_pair.public_key
else:
key_pair = None
root_device_name = block_device.prepend_dev(
block_device.properties_root_device_name(
boot_meta.get('properties', {})))
try:
image_meta = objects.ImageMeta.from_dict(boot_meta)
except ValueError as e:
# there must be invalid values in the image meta properties so
# consider this an invalid request
msg = _('Invalid image metadata. Error: %s') % six.text_type(e)
raise exception.InvalidRequest(msg)
numa_topology = hardware.numa_get_constraints(
instance_type, image_meta)
system_metadata = {}
# PCI requests come from two sources: instance flavor and
# requested_networks. The first call in below returns an
# InstancePCIRequests object which is a list of InstancePCIRequest
# objects. The second call in below creates an InstancePCIRequest
# object for each SR-IOV port, and append it to the list in the
# InstancePCIRequests object
pci_request_info = pci_request.get_pci_requests_from_flavor(
instance_type)
self.network_api.create_resource_requests(
context, requested_networks, pci_request_info)
base_options = {
'reservation_id': reservation_id,
'image_ref': image_href,
'kernel_id': kernel_id or '',
'ramdisk_id': ramdisk_id or '',
'power_state': power_state.NOSTATE,
'vm_state': vm_states.BUILDING,
'config_drive': config_drive,
'user_id': context.user_id,
'project_id': context.project_id,
'instance_type_id': instance_type['id'],
'memory_mb': instance_type['memory_mb'],
'vcpus': instance_type['vcpus'],
'root_gb': instance_type['root_gb'],
'ephemeral_gb': instance_type['ephemeral_gb'],
'display_name': display_name,
'display_description': display_description,
'user_data': user_data,
'key_name': key_name,
'key_data': key_data,
'locked': False,
'metadata': metadata or {},
'access_ip_v4': access_ip_v4,
'access_ip_v6': access_ip_v6,
'availability_zone': availability_zone,
'root_device_name': root_device_name,
'progress': 0,
'pci_requests': pci_request_info,
'numa_topology': numa_topology,
'system_metadata': system_metadata}
options_from_image = self._inherit_properties_from_image(
boot_meta, auto_disk_config)
base_options.update(options_from_image)
# return the validated options and maximum number of instances allowed
# by the network quotas
return base_options, max_network_count, key_pair, security_groups
def _provision_instances(self, context, instance_type, min_count,
max_count, base_options, boot_meta, security_groups,
block_device_mapping, shutdown_terminate,
instance_group, check_server_group_quota, filter_properties,
key_pair, tags, trusted_certs, supports_multiattach=False):
# Check quotas
num_instances = compute_utils.check_num_instances_quota(
context, instance_type, min_count, max_count)
security_groups = self.security_group_api.populate_security_groups(
security_groups)
self.security_group_api.ensure_default(context)
LOG.debug("Going to run %s instances...", num_instances)
instances_to_build = []
try:
for i in range(num_instances):
# Create a uuid for the instance so we can store the
# RequestSpec before the instance is created.
instance_uuid = uuidutils.generate_uuid()
# Store the RequestSpec that will be used for scheduling.
req_spec = objects.RequestSpec.from_components(context,
instance_uuid, boot_meta, instance_type,
base_options['numa_topology'],
base_options['pci_requests'], filter_properties,
instance_group, base_options['availability_zone'],
security_groups=security_groups)
# NOTE(danms): We need to record num_instances on the request
# spec as this is how the conductor knows how many were in this
# batch.
req_spec.num_instances = num_instances
req_spec.create()
# Create an instance object, but do not store in db yet.
instance = objects.Instance(context=context)
instance.uuid = instance_uuid
instance.update(base_options)
instance.keypairs = objects.KeyPairList(objects=[])
if key_pair:
instance.keypairs.objects.append(key_pair)
instance.trusted_certs = self._retrieve_trusted_certs_object(
context, trusted_certs)
instance = self.create_db_entry_for_new_instance(context,
instance_type, boot_meta, instance, security_groups,
block_device_mapping, num_instances, i,
shutdown_terminate, create_instance=False)
block_device_mapping = (
self._bdm_validate_set_size_and_instance(context,
instance, instance_type, block_device_mapping,
supports_multiattach))
instance_tags = self._transform_tags(tags, instance.uuid)
build_request = objects.BuildRequest(context,
instance=instance, instance_uuid=instance.uuid,
project_id=instance.project_id,
block_device_mappings=block_device_mapping,
tags=instance_tags)
build_request.create()
# Create an instance_mapping. The null cell_mapping indicates
# that the instance doesn't yet exist in a cell, and lookups
# for it need to instead look for the RequestSpec.
# cell_mapping will be populated after scheduling, with a
# scheduling failure using the cell_mapping for the special
# cell0.
inst_mapping = objects.InstanceMapping(context=context)
inst_mapping.instance_uuid = instance_uuid
inst_mapping.project_id = context.project_id
inst_mapping.cell_mapping = None
inst_mapping.create()
instances_to_build.append(
(req_spec, build_request, inst_mapping))
if instance_group:
if check_server_group_quota:
try:
objects.Quotas.check_deltas(
context, {'server_group_members': 1},
instance_group, context.user_id)
except exception.OverQuota:
msg = _("Quota exceeded, too many servers in "
"group")
raise exception.QuotaError(msg)
members = objects.InstanceGroup.add_members(
context, instance_group.uuid, [instance.uuid])
# NOTE(melwitt): We recheck the quota after creating the
# object to prevent users from allocating more resources
# than their allowed quota in the event of a race. This is
# configurable because it can be expensive if strict quota
# limits are not required in a deployment.
if CONF.quota.recheck_quota and check_server_group_quota:
try:
objects.Quotas.check_deltas(
context, {'server_group_members': 0},
instance_group, context.user_id)
except exception.OverQuota:
objects.InstanceGroup._remove_members_in_db(
context, instance_group.id, [instance.uuid])
msg = _("Quota exceeded, too many servers in "
"group")
raise exception.QuotaError(msg)
# list of members added to servers group in this iteration
# is needed to check quota of server group during add next
# instance
instance_group.members.extend(members)
# In the case of any exceptions, attempt DB cleanup
except Exception:
with excutils.save_and_reraise_exception():
self._cleanup_build_artifacts(None, instances_to_build)
return instances_to_build
@staticmethod
def _retrieve_trusted_certs_object(context, trusted_certs, rebuild=False):
"""Convert user-requested trusted cert IDs to TrustedCerts object
Also validates that the deployment is new enough to support trusted
image certification validation.
:param context: The user request auth context
:param trusted_certs: list of user-specified trusted cert string IDs,
may be None
:param rebuild: True if rebuilding the server, False if creating a
new server
:returns: nova.objects.TrustedCerts object or None if no user-specified
trusted cert IDs were given and nova is not configured with
default trusted cert IDs
:raises: nova.exception.CertificateValidationNotYetAvailable: If
rebuilding a server with trusted certs on a compute host that is
too old to supported trusted image cert validation, or if creating
a server with trusted certs and there are no compute hosts in the
deployment that are new enough to support trusted image cert
validation
"""
# Retrieve trusted_certs parameter, or use CONF value if certificate
# validation is enabled
if trusted_certs:
certs_to_return = objects.TrustedCerts(ids=trusted_certs)
elif (CONF.glance.verify_glance_signatures and
CONF.glance.enable_certificate_validation and
CONF.glance.default_trusted_certificate_ids):
certs_to_return = objects.TrustedCerts(
ids=CONF.glance.default_trusted_certificate_ids)
else:
return None
# Confirm trusted_certs are supported by the minimum nova
# compute service version
# TODO(mriedem): This minimum version compat code can be dropped in the
# 19.0.0 Stein release when all computes must be at a minimum running
# Rocky code.
if rebuild:
# we only care about the current cell since this is
# a rebuild
min_compute_version = objects.Service.get_minimum_version(
context, 'nova-compute')
else:
# we don't know which cell it's going to get scheduled
# to, so check all cells
# NOTE(mriedem): For multi-create server requests, we're hitting
# this for each instance since it's not cached; we could likely
# optimize this.
min_compute_version = \
objects.service.get_minimum_version_all_cells(
context, ['nova-compute'])
if min_compute_version < MIN_COMPUTE_TRUSTED_CERTS:
raise exception.CertificateValidationNotYetAvailable()
return certs_to_return
def _get_bdm_image_metadata(self, context, block_device_mapping,
legacy_bdm=True):
"""If we are booting from a volume, we need to get the
volume details from Cinder and make sure we pass the
metadata back accordingly.
"""
if not block_device_mapping:
return {}
for bdm in block_device_mapping:
if (legacy_bdm and
block_device.get_device_letter(
bdm.get('device_name', '')) != 'a'):
continue
elif not legacy_bdm and bdm.get('boot_index') != 0:
continue
volume_id = bdm.get('volume_id')
snapshot_id = bdm.get('snapshot_id')
if snapshot_id:
# NOTE(alaski): A volume snapshot inherits metadata from the
# originating volume, but the API does not expose metadata
# on the snapshot itself. So we query the volume for it below.
snapshot = self.volume_api.get_snapshot(context, snapshot_id)
volume_id = snapshot['volume_id']
if bdm.get('image_id'):
try:
image_id = bdm['image_id']
image_meta = self.image_api.get(context, image_id)
return image_meta
except Exception:
raise exception.InvalidBDMImage(id=image_id)
elif volume_id:
try:
volume = self.volume_api.get(context, volume_id)
except exception.CinderConnectionFailed:
raise
except Exception:
raise exception.InvalidBDMVolume(id=volume_id)
if not volume.get('bootable', True):
raise exception.InvalidBDMVolumeNotBootable(id=volume_id)
return utils.get_image_metadata_from_volume(volume)
return {}
@staticmethod
def _get_requested_instance_group(context, filter_properties):
if (not filter_properties or
not filter_properties.get('scheduler_hints')):
return
group_hint = filter_properties.get('scheduler_hints').get('group')
if not group_hint:
return
return objects.InstanceGroup.get_by_uuid(context, group_hint)
def _create_instance(self, context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_groups,
availability_zone, user_data, metadata, injected_files,
admin_password, access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config, filter_properties,
reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
check_server_group_quota=False, tags=None,
supports_multiattach=False, trusted_certs=None):
"""Verify all the input parameters regardless of the provisioning
strategy being performed and schedule the instance(s) for
creation.
"""
# Normalize and setup some parameters
if reservation_id is None:
reservation_id = utils.generate_uid('r')
security_groups = security_groups or ['default']
min_count = min_count or 1
max_count = max_count or min_count
block_device_mapping = block_device_mapping or []
tags = tags or []
if image_href:
image_id, boot_meta = self._get_image(context, image_href)
else:
# This is similar to the logic in _retrieve_trusted_certs_object.
if (trusted_certs or
(CONF.glance.verify_glance_signatures and
CONF.glance.enable_certificate_validation and
CONF.glance.default_trusted_certificate_ids)):
msg = _("Image certificate validation is not supported "
"when booting from volume")
raise exception.CertificateValidationFailed(message=msg)
image_id = None
boot_meta = self._get_bdm_image_metadata(
context, block_device_mapping, legacy_bdm)
self._check_auto_disk_config(image=boot_meta,
auto_disk_config=auto_disk_config)
base_options, max_net_count, key_pair, security_groups = \
self._validate_and_build_base_options(
context, instance_type, boot_meta, image_href, image_id,
kernel_id, ramdisk_id, display_name, display_description,
key_name, key_data, security_groups, availability_zone,
user_data, metadata, access_ip_v4, access_ip_v6,
requested_networks, config_drive, auto_disk_config,
reservation_id, max_count)
# max_net_count is the maximum number of instances requested by the
# user adjusted for any network quota constraints, including
# consideration of connections to each requested network
if max_net_count < min_count:
raise exception.PortLimitExceeded()
elif max_net_count < max_count:
LOG.info("max count reduced from %(max_count)d to "
"%(max_net_count)d due to network port quota",
{'max_count': max_count,
'max_net_count': max_net_count})
max_count = max_net_count
block_device_mapping = self._check_and_transform_bdm(context,
base_options, instance_type, boot_meta, min_count, max_count,
block_device_mapping, legacy_bdm)
# We can't do this check earlier because we need bdms from all sources
# to have been merged in order to get the root bdm.
self._checks_for_create_and_rebuild(context, image_id, boot_meta,
instance_type, metadata, injected_files,
block_device_mapping.root_bdm())
instance_group = self._get_requested_instance_group(context,
filter_properties)
tags = self._create_tag_list_obj(context, tags)
instances_to_build = self._provision_instances(
context, instance_type, min_count, max_count, base_options,
boot_meta, security_groups, block_device_mapping,
shutdown_terminate, instance_group, check_server_group_quota,
filter_properties, key_pair, tags, trusted_certs,
supports_multiattach)
instances = []
request_specs = []
build_requests = []
for rs, build_request, im in instances_to_build:
build_requests.append(build_request)
instance = build_request.get_new_instance(context)
instances.append(instance)
request_specs.append(rs)
if CONF.cells.enable:
# NOTE(danms): CellsV1 can't do the new thing, so we
# do the old thing here. We can remove this path once
# we stop supporting v1.
for instance in instances:
instance.create()
# NOTE(melwitt): We recheck the quota after creating the objects
# to prevent users from allocating more resources than their
# allowed quota in the event of a race. This is configurable
# because it can be expensive if strict quota limits are not
# required in a deployment.
if CONF.quota.recheck_quota:
try:
compute_utils.check_num_instances_quota(
context, instance_type, 0, 0,
orig_num_req=len(instances))
except exception.TooManyInstances:
with excutils.save_and_reraise_exception():
# Need to clean up all the instances we created
# along with the build requests, request specs,
# and instance mappings.
self._cleanup_build_artifacts(instances,
instances_to_build)
self.compute_task_api.build_instances(context,
instances=instances, image=boot_meta,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=False)
else:
self.compute_task_api.schedule_and_build_instances(
context,
build_requests=build_requests,
request_spec=request_specs,
image=boot_meta,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
block_device_mapping=block_device_mapping,
tags=tags)
return instances, reservation_id
@staticmethod
def _cleanup_build_artifacts(instances, instances_to_build):
# instances_to_build is a list of tuples:
# (RequestSpec, BuildRequest, InstanceMapping)
# Be paranoid about artifacts being deleted underneath us.
for instance in instances or []:
try:
instance.destroy()
except exception.InstanceNotFound:
pass
for rs, build_request, im in instances_to_build or []:
try:
rs.destroy()
except exception.RequestSpecNotFound:
pass
try:
build_request.destroy()
except exception.BuildRequestNotFound:
pass
try:
im.destroy()
except exception.InstanceMappingNotFound:
pass
@staticmethod
def _volume_size(instance_type, bdm):
size = bdm.get('volume_size')
# NOTE (ndipanov): inherit flavor size only for swap and ephemeral
if (size is None and bdm.get('source_type') == 'blank' and
bdm.get('destination_type') == 'local'):
if bdm.get('guest_format') == 'swap':
size = instance_type.get('swap', 0)
else:
size = instance_type.get('ephemeral_gb', 0)
return size
def _prepare_image_mapping(self, instance_type, mappings):
"""Extract and format blank devices from image mappings."""
prepared_mappings = []
for bdm in block_device.mappings_prepend_dev(mappings):
LOG.debug("Image bdm %s", bdm)
virtual_name = bdm['virtual']
if virtual_name == 'ami' or virtual_name == 'root':
continue
if not block_device.is_swap_or_ephemeral(virtual_name):
continue
guest_format = bdm.get('guest_format')
if virtual_name == 'swap':
guest_format = 'swap'
if not guest_format:
guest_format = CONF.default_ephemeral_format
values = block_device.BlockDeviceDict({
'device_name': bdm['device'],
'source_type': 'blank',
'destination_type': 'local',
'device_type': 'disk',
'guest_format': guest_format,
'delete_on_termination': True,
'boot_index': -1})
values['volume_size'] = self._volume_size(
instance_type, values)
if values['volume_size'] == 0:
continue
prepared_mappings.append(values)
return prepared_mappings
def _bdm_validate_set_size_and_instance(self, context, instance,
instance_type,
block_device_mapping,
supports_multiattach=False):
"""Ensure the bdms are valid, then set size and associate with instance
Because this method can be called multiple times when more than one
instance is booted in a single request it makes a copy of the bdm list.
"""
LOG.debug("block_device_mapping %s", list(block_device_mapping),
instance_uuid=instance.uuid)
self._validate_bdm(
context, instance, instance_type, block_device_mapping,
supports_multiattach)
instance_block_device_mapping = block_device_mapping.obj_clone()
for bdm in instance_block_device_mapping:
bdm.volume_size = self._volume_size(instance_type, bdm)
bdm.instance_uuid = instance.uuid
return instance_block_device_mapping
def _create_block_device_mapping(self, block_device_mapping):
# Copy the block_device_mapping because this method can be called
# multiple times when more than one instance is booted in a single
# request. This avoids 'id' being set and triggering the object dupe
# detection
db_block_device_mapping = copy.deepcopy(block_device_mapping)
# Create the BlockDeviceMapping objects in the db.
for bdm in db_block_device_mapping:
# TODO(alaski): Why is this done?
if bdm.volume_size == 0:
continue
bdm.update_or_create()
def _validate_bdm(self, context, instance, instance_type,
block_device_mappings, supports_multiattach=False):
# Make sure that the boot indexes make sense.
# Setting a negative value or None indicates that the device should not
# be used for booting.
boot_indexes = sorted([bdm.boot_index
for bdm in block_device_mappings
if bdm.boot_index is not None
and bdm.boot_index >= 0])
# Each device which is capable of being used as boot device should
# be given a unique boot index, starting from 0 in ascending order, and
# there needs to be at least one boot device.
if not boot_indexes or any(i != v for i, v in enumerate(boot_indexes)):
# Convert the BlockDeviceMappingList to a list for repr details.
LOG.debug('Invalid block device mapping boot sequence for '
'instance: %s', list(block_device_mappings),
instance=instance)
raise exception.InvalidBDMBootSequence()
for bdm in block_device_mappings:
# NOTE(vish): For now, just make sure the volumes are accessible.
# Additionally, check that the volume can be attached to this
# instance.
snapshot_id = bdm.snapshot_id
volume_id = bdm.volume_id
image_id = bdm.image_id
if (image_id is not None and
image_id != instance.get('image_ref')):
try:
self._get_image(context, image_id)
except Exception:
raise exception.InvalidBDMImage(id=image_id)
if (bdm.source_type == 'image' and
bdm.destination_type == 'volume' and
not bdm.volume_size):
raise exception.InvalidBDM(message=_("Images with "
"destination_type 'volume' need to have a non-zero "
"size specified"))
elif volume_id is not None:
# The instance is being created and we don't know which
# cell it's going to land in, so check all cells.
min_compute_version = \
objects.service.get_minimum_version_all_cells(
context, ['nova-compute'])
try:
# NOTE(ildikov): The boot from volume operation did not
# reserve the volume before Pike and as the older computes
# are running 'check_attach' which will fail if the volume
# is in 'attaching' state; if the compute service version
# is not high enough we will just perform the old check as
# opposed to reserving the volume here.
volume = self.volume_api.get(context, volume_id)
if (min_compute_version >=
BFV_RESERVE_MIN_COMPUTE_VERSION):
self._check_attach_and_reserve_volume(
context, volume, instance, bdm,
supports_multiattach)
else:
# NOTE(ildikov): This call is here only for backward
# compatibility can be removed after Ocata EOL.
self._check_attach(context, volume, instance)
bdm.volume_size = volume.get('size')
# NOTE(mnaser): If we end up reserving the volume, it will
# not have an attachment_id which is needed
# for cleanups. This can be removed once
# all calls to reserve_volume are gone.
if 'attachment_id' not in bdm:
bdm.attachment_id = None
except (exception.CinderConnectionFailed,
exception.InvalidVolume,
exception.MultiattachNotSupportedOldMicroversion,
exception.MultiattachSupportNotYetAvailable):
raise
except exception.InvalidInput as exc:
raise exception.InvalidVolume(reason=exc.format_message())
except Exception:
raise exception.InvalidBDMVolume(id=volume_id)
elif snapshot_id is not None:
try:
snap = self.volume_api.get_snapshot(context, snapshot_id)
bdm.volume_size = bdm.volume_size or snap.get('size')
except exception.CinderConnectionFailed:
raise
except Exception:
raise exception.InvalidBDMSnapshot(id=snapshot_id)
elif (bdm.source_type == 'blank' and
bdm.destination_type == 'volume' and
not bdm.volume_size):
raise exception.InvalidBDM(message=_("Blank volumes "
"(source: 'blank', dest: 'volume') need to have non-zero "
"size"))
ephemeral_size = sum(bdm.volume_size or instance_type['ephemeral_gb']
for bdm in block_device_mappings
if block_device.new_format_is_ephemeral(bdm))
if ephemeral_size > instance_type['ephemeral_gb']:
raise exception.InvalidBDMEphemeralSize()
# There should be only one swap
swap_list = block_device.get_bdm_swap_list(block_device_mappings)
if len(swap_list) > 1:
msg = _("More than one swap drive requested.")
raise exception.InvalidBDMFormat(details=msg)
if swap_list:
swap_size = swap_list[0].volume_size or 0
if swap_size > instance_type['swap']:
raise exception.InvalidBDMSwapSize()
max_local = CONF.max_local_block_devices
if max_local >= 0:
num_local = len([bdm for bdm in block_device_mappings
if bdm.destination_type == 'local'])
if num_local > max_local:
raise exception.InvalidBDMLocalsLimit()
def _check_attach(self, context, volume, instance):
# TODO(ildikov): This check_attach code is kept only for backward
# compatibility and should be removed after Ocata EOL.
if volume['status'] != 'available':
msg = _("volume '%(vol)s' status must be 'available'. Currently "
"in '%(status)s'") % {'vol': volume['id'],
'status': volume['status']}
raise exception.InvalidVolume(reason=msg)
if volume['attach_status'] == 'attached':
msg = _("volume %s already attached") % volume['id']
raise exception.InvalidVolume(reason=msg)
self.volume_api.check_availability_zone(context, volume,
instance=instance)
def _populate_instance_names(self, instance, num_instances, index):
"""Populate instance display_name and hostname.
:param instance: The instance to set the display_name, hostname for
:type instance: nova.objects.Instance
:param num_instances: Total number of instances being created in this
request
:param index: The 0-based index of this particular instance
"""
# NOTE(mriedem): This is only here for test simplicity since a server
# name is required in the REST API.
if 'display_name' not in instance or instance.display_name is None:
instance.display_name = 'Server %s' % instance.uuid
# if we're booting multiple instances, we need to add an indexing
# suffix to both instance.hostname and instance.display_name. This is
# not necessary for a single instance.
if num_instances == 1:
default_hostname = 'Server-%s' % instance.uuid
instance.hostname = utils.sanitize_hostname(
instance.display_name, default_hostname)
elif num_instances > 1 and self.cell_type != 'api':
old_display_name = instance.display_name
new_display_name = '%s-%d' % (old_display_name, index + 1)
if utils.sanitize_hostname(old_display_name) == "":
instance.hostname = 'Server-%s' % instance.uuid
else:
instance.hostname = utils.sanitize_hostname(
new_display_name)
instance.display_name = new_display_name
def _populate_instance_for_create(self, context, instance, image,
index, security_groups, instance_type,
num_instances, shutdown_terminate):
"""Build the beginning of a new instance."""
instance.launch_index = index
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.SCHEDULING
info_cache = objects.InstanceInfoCache()
info_cache.instance_uuid = instance.uuid
info_cache.network_info = network_model.NetworkInfo()
instance.info_cache = info_cache
instance.flavor = instance_type
instance.old_flavor = None
instance.new_flavor = None
if CONF.ephemeral_storage_encryption.enabled:
# NOTE(kfarr): dm-crypt expects the cipher in a
# hyphenated format: cipher-chainmode-ivmode
# (ex: aes-xts-plain64). The algorithm needs
# to be parsed out to pass to the key manager (ex: aes).
cipher = CONF.ephemeral_storage_encryption.cipher
algorithm = cipher.split('-')[0] if cipher else None
instance.ephemeral_key_uuid = self.key_manager.create_key(
context,
algorithm=algorithm,
length=CONF.ephemeral_storage_encryption.key_size)
else:
instance.ephemeral_key_uuid = None
# Store image properties so we can use them later
# (for notifications, etc). Only store what we can.
if not instance.obj_attr_is_set('system_metadata'):
instance.system_metadata = {}
# Make sure we have the dict form that we need for instance_update.
instance.system_metadata = utils.instance_sys_meta(instance)
system_meta = utils.get_system_metadata_from_image(
image, instance_type)
# In case we couldn't find any suitable base_image
system_meta.setdefault('image_base_image_ref', instance.image_ref)
system_meta['owner_user_name'] = context.user_name
system_meta['owner_project_name'] = context.project_name
instance.system_metadata.update(system_meta)
if CONF.use_neutron:
# For Neutron we don't actually store anything in the database, we
# proxy the security groups on the instance from the ports
# attached to the instance.
instance.security_groups = objects.SecurityGroupList()
else:
instance.security_groups = security_groups
self._populate_instance_names(instance, num_instances, index)
instance.shutdown_terminate = shutdown_terminate
return instance
def _create_tag_list_obj(self, context, tags):
"""Create TagList objects from simple string tags.
:param context: security context.
:param tags: simple string tags from API request.
:returns: TagList object.
"""
tag_list = [objects.Tag(context=context, tag=t) for t in tags]
tag_list_obj = objects.TagList(objects=tag_list)
return tag_list_obj
def _transform_tags(self, tags, resource_id):
"""Change the resource_id of the tags according to the input param.
Because this method can be called multiple times when more than one
instance is booted in a single request it makes a copy of the tags
list.
:param tags: TagList object.
:param resource_id: string.
:returns: TagList object.
"""
instance_tags = tags.obj_clone()
for tag in instance_tags:
tag.resource_id = resource_id
return instance_tags
# This method remains because cellsv1 uses it in the scheduler
def create_db_entry_for_new_instance(self, context, instance_type, image,
instance, security_group, block_device_mapping, num_instances,
index, shutdown_terminate=False, create_instance=True):
"""Create an entry in the DB for this new instance,
including any related table updates (such as security group,
etc).
This is called by the scheduler after a location for the
instance has been determined.
:param create_instance: Determines if the instance is created here or
just populated for later creation. This is done so that this code
can be shared with cellsv1 which needs the instance creation to
happen here. It should be removed and this method cleaned up when
cellsv1 is a distant memory.
"""
self._populate_instance_for_create(context, instance, image, index,
security_group, instance_type,
num_instances, shutdown_terminate)
if create_instance:
instance.create()
return instance
def _check_multiple_instances_with_neutron_ports(self,
requested_networks):
"""Check whether multiple instances are created from port id(s)."""
for requested_net in requested_networks:
if requested_net.port_id:
msg = _("Unable to launch multiple instances with"
" a single configured port ID. Please launch your"
" instance one by one with different ports.")
raise exception.MultiplePortsNotApplicable(reason=msg)
def _check_multiple_instances_with_specified_ip(self, requested_networks):
"""Check whether multiple instances are created with specified ip."""
for requested_net in requested_networks:
if requested_net.network_id and requested_net.address:
msg = _("max_count cannot be greater than 1 if an fixed_ip "
"is specified.")
raise exception.InvalidFixedIpAndMaxCountRequest(reason=msg)
@hooks.add_hook("create_instance")
def create(self, context, instance_type,
image_href, kernel_id=None, ramdisk_id=None,
min_count=None, max_count=None,
display_name=None, display_description=None,
key_name=None, key_data=None, security_groups=None,
availability_zone=None, forced_host=None, forced_node=None,
user_data=None, metadata=None, injected_files=None,
admin_password=None, block_device_mapping=None,
access_ip_v4=None, access_ip_v6=None, requested_networks=None,
config_drive=None, auto_disk_config=None, scheduler_hints=None,
legacy_bdm=True, shutdown_terminate=False,
check_server_group_quota=False, tags=None,
supports_multiattach=False, trusted_certs=None):
"""Provision instances, sending instance information to the
scheduler. The scheduler will determine where the instance(s)
go and will handle creating the DB entries.
Returns a tuple of (instances, reservation_id)
"""
if requested_networks and max_count is not None and max_count > 1:
self._check_multiple_instances_with_specified_ip(
requested_networks)
if utils.is_neutron():
self._check_multiple_instances_with_neutron_ports(
requested_networks)
if availability_zone:
available_zones = availability_zones.\
get_availability_zones(context.elevated(), True)
if forced_host is None and availability_zone not in \
available_zones:
msg = _('The requested availability zone is not available')
raise exception.InvalidRequest(msg)
filter_properties = scheduler_utils.build_filter_properties(
scheduler_hints, forced_host, forced_node, instance_type)
return self._create_instance(
context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_groups,
availability_zone, user_data, metadata,
injected_files, admin_password,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
filter_properties=filter_properties,
legacy_bdm=legacy_bdm,
shutdown_terminate=shutdown_terminate,
check_server_group_quota=check_server_group_quota,
tags=tags, supports_multiattach=supports_multiattach,
trusted_certs=trusted_certs)
def _check_auto_disk_config(self, instance=None, image=None,
**extra_instance_updates):
auto_disk_config = extra_instance_updates.get("auto_disk_config")
if auto_disk_config is None:
return
if not image and not instance:
return
if image:
image_props = image.get("properties", {})
auto_disk_config_img = \
utils.get_auto_disk_config_from_image_props(image_props)
image_ref = image.get("id")
else:
sys_meta = utils.instance_sys_meta(instance)
image_ref = sys_meta.get('image_base_image_ref')
auto_disk_config_img = \
utils.get_auto_disk_config_from_instance(sys_meta=sys_meta)
self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
auto_disk_config,
image_ref)
def _lookup_instance(self, context, uuid):
'''Helper method for pulling an instance object from a database.
During the transition to cellsv2 there is some complexity around
retrieving an instance from the database which this method hides. If
there is an instance mapping then query the cell for the instance, if
no mapping exists then query the configured nova database.
Once we are past the point that all deployments can be assumed to be
migrated to cellsv2 this method can go away.
'''
inst_map = None
try:
inst_map = objects.InstanceMapping.get_by_instance_uuid(
context, uuid)
except exception.InstanceMappingNotFound:
# TODO(alaski): This exception block can be removed once we're
# guaranteed everyone is using cellsv2.
pass
if (inst_map is None or inst_map.cell_mapping is None or
CONF.cells.enable):
# If inst_map is None then the deployment has not migrated to
# cellsv2 yet.
# If inst_map.cell_mapping is None then the instance is not in a
# cell yet. Until instance creation moves to the conductor the
# instance can be found in the configured database, so attempt
# to look it up.
# If we're on cellsv1, we can't yet short-circuit the cells
# messaging path
cell = None
try:
instance = objects.Instance.get_by_uuid(context, uuid)
except exception.InstanceNotFound:
# If we get here then the conductor is in charge of writing the
# instance to the database and hasn't done that yet. It's up to
# the caller of this method to determine what to do with that
# information.
return None, None
else:
cell = inst_map.cell_mapping
with nova_context.target_cell(context, cell) as cctxt:
try:
instance = objects.Instance.get_by_uuid(cctxt, uuid)
except exception.InstanceNotFound:
# Since the cell_mapping exists we know the instance is in
# the cell, however InstanceNotFound means it's already
# deleted.
return None, None
return cell, instance
def _delete_while_booting(self, context, instance):
"""Handle deletion if the instance has not reached a cell yet
Deletion before an instance reaches a cell needs to be handled
differently. What we're attempting to do is delete the BuildRequest
before the api level conductor does. If we succeed here then the boot
request stops before reaching a cell. If not then the instance will
need to be looked up in a cell db and the normal delete path taken.
"""
deleted = self._attempt_delete_of_buildrequest(context, instance)
# After service version 15 deletion of the BuildRequest will halt the
# build process in the conductor. In that case run the rest of this
# method and consider the instance deleted. If we have not yet reached
# service version 15 then just return False so the rest of the delete
# process will proceed usually.
service_version = objects.Service.get_minimum_version(
context, 'nova-osapi_compute')
if service_version < 15:
return False
if deleted:
# If we've reached this block the successful deletion of the
# buildrequest indicates that the build process should be halted by
# the conductor.
# NOTE(alaski): Though the conductor halts the build process it
# does not currently delete the instance record. This is
# because in the near future the instance record will not be
# created if the buildrequest has been deleted here. For now we
# ensure the instance has been set to deleted at this point.
# Yes this directly contradicts the comment earlier in this
# method, but this is a temporary measure.
# Look up the instance because the current instance object was
# stashed on the buildrequest and therefore not complete enough
# to run .destroy().
try:
instance_uuid = instance.uuid
cell, instance = self._lookup_instance(context, instance_uuid)
if instance is not None:
# If instance is None it has already been deleted.
if cell:
with nova_context.target_cell(context, cell) as cctxt:
# FIXME: When the instance context is targeted,
# we can remove this
with compute_utils.notify_about_instance_delete(
self.notifier, cctxt, instance):
instance.destroy()
else:
instance.destroy()
except exception.InstanceNotFound:
pass
return True
return False
def _attempt_delete_of_buildrequest(self, context, instance):
# If there is a BuildRequest then the instance may not have been
# written to a cell db yet. Delete the BuildRequest here, which
# will indicate that the Instance build should not proceed.
try:
build_req = objects.BuildRequest.get_by_instance_uuid(
context, instance.uuid)
build_req.destroy()
except exception.BuildRequestNotFound:
# This means that conductor has deleted the BuildRequest so the
# instance is now in a cell and the delete needs to proceed
# normally.
return False
# We need to detach from any volumes so they aren't orphaned.
self._local_cleanup_bdm_volumes(
build_req.block_device_mappings, instance, context)
return True
def _delete(self, context, instance, delete_type, cb, **instance_attrs):
if instance.disable_terminate:
LOG.info('instance termination disabled', instance=instance)
return
cell = None
# If there is an instance.host (or the instance is shelved-offloaded or
# in error state), the instance has been scheduled and sent to a
# cell/compute which means it was pulled from the cell db.
# Normal delete should be attempted.
may_have_ports_or_volumes = compute_utils.may_have_ports_or_volumes(
instance)
if not instance.host and not may_have_ports_or_volumes:
try:
if self._delete_while_booting(context, instance):
return
# If instance.host was not set it's possible that the Instance
# object here was pulled from a BuildRequest object and is not
# fully populated. Notably it will be missing an 'id' field
# which will prevent instance.destroy from functioning
# properly. A lookup is attempted which will either return a
# full Instance or None if not found. If not found then it's
# acceptable to skip the rest of the delete processing.
cell, instance = self._lookup_instance(context, instance.uuid)
if cell and instance:
try:
# Now destroy the instance from the cell it lives in.
with compute_utils.notify_about_instance_delete(
self.notifier, context, instance):
instance.destroy()
except exception.InstanceNotFound:
pass
# The instance was deleted or is already gone.
return
if not instance:
# Instance is already deleted.
return
except exception.ObjectActionError:
# NOTE(melwitt): This means the instance.host changed
# under us indicating the instance became scheduled
# during the destroy(). Refresh the instance from the DB and
# continue on with the delete logic for a scheduled instance.
# NOTE(danms): If instance.host is set, we should be able to
# do the following lookup. If not, there's not much we can
# do to recover.
cell, instance = self._lookup_instance(context, instance.uuid)
if not instance:
# Instance is already deleted
return
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
# At these states an instance has a snapshot associate.
if instance.vm_state in (vm_states.SHELVED,
vm_states.SHELVED_OFFLOADED):
snapshot_id = instance.system_metadata.get('shelved_image_id')
LOG.info("Working on deleting snapshot %s "
"from shelved instance...",
snapshot_id, instance=instance)
try:
self.image_api.delete(context, snapshot_id)
except (exception.ImageNotFound,
exception.ImageNotAuthorized) as exc:
LOG.warning("Failed to delete snapshot "
"from shelved instance (%s).",
exc.format_message(), instance=instance)
except Exception:
LOG.exception("Something wrong happened when trying to "
"delete snapshot from shelved instance.",
instance=instance)
original_task_state = instance.task_state
try:
# NOTE(maoy): no expected_task_state needs to be set
instance.update(instance_attrs)
instance.progress = 0
instance.save()
# NOTE(dtp): cells.enable = False means "use cells v2".
# Run everywhere except v1 compute cells.
if not CONF.cells.enable or self.cell_type == 'api':
# TODO(melwitt): In Rocky, we store console authorizations
# in both the consoleauth service and the database while
# we convert to using the database. Remove the consoleauth
# line below when authorizations are no longer being
# stored in consoleauth, in Stein.
self.consoleauth_rpcapi.delete_tokens_for_instance(
context, instance.uuid)
if self.cell_type == 'api':
# NOTE(comstud): If we're in the API cell, we need to
# skip all remaining logic and just call the callback,
# which will cause a cast to the child cell.
cb(context, instance, bdms)
return
if not instance.host and not may_have_ports_or_volumes:
try:
with compute_utils.notify_about_instance_delete(
self.notifier, context, instance,
delete_type
if delete_type != 'soft_delete'
else 'delete'):
instance.destroy()
LOG.info('Instance deleted and does not have host '
'field, its vm_state is %(state)s.',
{'state': instance.vm_state},
instance=instance)
return
except exception.ObjectActionError as ex:
# The instance's host likely changed under us as
# this instance could be building and has since been
# scheduled. Continue with attempts to delete it.
LOG.debug('Refreshing instance because: %s', ex,
instance=instance)
instance.refresh()
if instance.vm_state == vm_states.RESIZED:
self._confirm_resize_on_deleting(context, instance)
is_local_delete = True
try:
# instance.host must be set in order to look up the service.
if instance.host is not None:
service = objects.Service.get_by_compute_host(
context.elevated(), instance.host)
is_local_delete = not self.servicegroup_api.service_is_up(
service)
if not is_local_delete:
if original_task_state in (task_states.DELETING,
task_states.SOFT_DELETING):
LOG.info('Instance is already in deleting state, '
'ignoring this request',
instance=instance)
return
self._record_action_start(context, instance,
instance_actions.DELETE)
cb(context, instance, bdms)
except exception.ComputeHostNotFound:
LOG.debug('Compute host %s not found during service up check, '
'going to local delete instance', instance.host,
instance=instance)
if is_local_delete:
# If instance is in shelved_offloaded state or compute node
# isn't up, delete instance from db and clean bdms info and
# network info
if cell is None:
# NOTE(danms): If we didn't get our cell from one of the
# paths above, look it up now.
try:
im = objects.InstanceMapping.get_by_instance_uuid(
context, instance.uuid)
cell = im.cell_mapping
except exception.InstanceMappingNotFound:
LOG.warning('During local delete, failed to find '
'instance mapping', instance=instance)
return
LOG.debug('Doing local delete in cell %s', cell.identity,
instance=instance)
with nova_context.target_cell(context, cell) as cctxt:
self._local_delete(cctxt, instance, bdms, delete_type, cb)
except exception.InstanceNotFound:
# NOTE(comstud): Race condition. Instance already gone.
pass
def _confirm_resize_on_deleting(self, context, instance):
# If in the middle of a resize, use confirm_resize to
# ensure the original instance is cleaned up too
migration = None
for status in ('finished', 'confirming'):
try:
migration = objects.Migration.get_by_instance_and_status(
context.elevated(), instance.uuid, status)
LOG.info('Found an unconfirmed migration during delete, '
'id: %(id)s, status: %(status)s',
{'id': migration.id,
'status': migration.status},
instance=instance)
break
except exception.MigrationNotFoundByStatus:
pass
if not migration:
LOG.info('Instance may have been confirmed during delete',
instance=instance)
return
src_host = migration.source_compute
self._record_action_start(context, instance,
instance_actions.CONFIRM_RESIZE)
self.compute_rpcapi.confirm_resize(context,
instance, migration, src_host, cast=False)
def _local_cleanup_bdm_volumes(self, bdms, instance, context):
"""The method deletes the bdm records and, if a bdm is a volume, call
the terminate connection and the detach volume via the Volume API.
"""
elevated = context.elevated()
for bdm in bdms:
if bdm.is_volume:
try:
if bdm.attachment_id:
self.volume_api.attachment_delete(context,
bdm.attachment_id)
else:
connector = compute_utils.get_stashed_volume_connector(
bdm, instance)
if connector:
self.volume_api.terminate_connection(context,
bdm.volume_id,
connector)
else:
LOG.debug('Unable to find connector for volume %s,'
' not attempting terminate_connection.',
bdm.volume_id, instance=instance)
# Attempt to detach the volume. If there was no
# connection made in the first place this is just
# cleaning up the volume state in the Cinder DB.
self.volume_api.detach(elevated, bdm.volume_id,
instance.uuid)
if bdm.delete_on_termination:
self.volume_api.delete(context, bdm.volume_id)
except Exception as exc:
LOG.warning("Ignoring volume cleanup failure due to %s",
exc, instance=instance)
# If we're cleaning up volumes from an instance that wasn't yet
# created in a cell, i.e. the user deleted the server while
# the BuildRequest still existed, then the BDM doesn't actually
# exist in the DB to destroy it.
if 'id' in bdm:
bdm.destroy()
def _local_delete(self, context, instance, bdms, delete_type, cb):
if instance.vm_state == vm_states.SHELVED_OFFLOADED:
LOG.info("instance is in SHELVED_OFFLOADED state, cleanup"
" the instance's info from database.",
instance=instance)
else:
LOG.warning("instance's host %s is down, deleting from "
"database", instance.host, instance=instance)
with compute_utils.notify_about_instance_delete(
self.notifier, context, instance,
delete_type if delete_type != 'soft_delete' else 'delete'):
elevated = context.elevated()
if self.cell_type != 'api':
# NOTE(liusheng): In nova-network multi_host scenario,deleting
# network info of the instance may need instance['host'] as
# destination host of RPC call. If instance in
# SHELVED_OFFLOADED state, instance['host'] is None, here, use
# shelved_host as host to deallocate network info and reset
# instance['host'] after that. Here we shouldn't use
# instance.save(), because this will mislead user who may think
# the instance's host has been changed, and actually, the
# instance.host is always None.
orig_host = instance.host
try:
if instance.vm_state == vm_states.SHELVED_OFFLOADED:
sysmeta = getattr(instance,
obj_base.get_attrname(
'system_metadata'))
instance.host = sysmeta.get('shelved_host')
self.network_api.deallocate_for_instance(elevated,
instance)
finally:
instance.host = orig_host
# cleanup volumes
self._local_cleanup_bdm_volumes(bdms, instance, context)
# Cleanup allocations in Placement since we can't do it from the
# compute service.
self.placementclient.delete_allocation_for_instance(
context, instance.uuid)
cb(context, instance, bdms, local=True)
instance.destroy()
def _do_delete(self, context, instance, bdms, local=False):
if local:
instance.vm_state = vm_states.DELETED
instance.task_state = None
instance.terminated_at = timeutils.utcnow()
instance.save()
else:
self.compute_rpcapi.terminate_instance(context, instance, bdms,
delete_type='delete')
def _do_force_delete(self, context, instance, bdms, local=False):
if local:
instance.vm_state = vm_states.DELETED
instance.task_state = None
instance.terminated_at = timeutils.utcnow()
instance.save()
else:
self.compute_rpcapi.terminate_instance(context, instance, bdms,
delete_type='force_delete')
def _do_soft_delete(self, context, instance, bdms, local=False):
if local:
instance.vm_state = vm_states.SOFT_DELETED
instance.task_state = None
instance.terminated_at = timeutils.utcnow()
instance.save()
else:
self.compute_rpcapi.soft_delete_instance(context, instance)
# NOTE(maoy): we allow delete to be called no matter what vm_state says.
@check_instance_lock
@check_instance_cell
@check_instance_state(vm_state=None, task_state=None,
must_have_launched=True)
def soft_delete(self, context, instance):
"""Terminate an instance."""
LOG.debug('Going to try to soft delete instance',
instance=instance)
self._delete(context, instance, 'soft_delete', self._do_soft_delete,
task_state=task_states.SOFT_DELETING,
deleted_at=timeutils.utcnow())
def _delete_instance(self, context, instance):
self._delete(context, instance, 'delete', self._do_delete,
task_state=task_states.DELETING)
@check_instance_lock
@check_instance_cell
@check_instance_state(vm_state=None, task_state=None,
must_have_launched=False)
def delete(self, context, instance):
"""Terminate an instance."""
LOG.debug("Going to try to terminate instance", instance=instance)
self._delete_instance(context, instance)
@check_instance_lock
@check_instance_state(vm_state=[vm_states.SOFT_DELETED])
def restore(self, context, instance):
"""Restore a previously deleted (but not reclaimed) instance."""
# Check quotas
flavor = instance.get_flavor()
project_id, user_id = quotas_obj.ids_from_instance(context, instance)
compute_utils.check_num_instances_quota(context, flavor, 1, 1,
project_id=project_id, user_id=user_id)
self._record_action_start(context, instance, instance_actions.RESTORE)
if instance.host:
instance.task_state = task_states.RESTORING
instance.deleted_at = None
instance.save(expected_task_state=[None])
# TODO(melwitt): We're not rechecking for strict quota here to
# guard against going over quota during a race at this time because
# the resource consumption for this operation is written to the
# database by compute.
self.compute_rpcapi.restore_instance(context, instance)
else:
instance.vm_state = vm_states.ACTIVE
instance.task_state = None
instance.deleted_at = None
instance.save(expected_task_state=[None])
@check_instance_lock
@check_instance_state(task_state=None,
must_have_launched=False)
def force_delete(self, context, instance):
"""Force delete an instance in any vm_state/task_state."""
self._delete(context, instance, 'force_delete', self._do_force_delete,
task_state=task_states.DELETING)
def force_stop(self, context, instance, do_cast=True, clean_shutdown=True):
LOG.debug("Going to try to stop instance", instance=instance)
instance.task_state = task_states.POWERING_OFF
instance.progress = 0
instance.save(expected_task_state=[None])
self._record_action_start(context, instance, instance_actions.STOP)
self.compute_rpcapi.stop_instance(context, instance, do_cast=do_cast,
clean_shutdown=clean_shutdown)
@check_instance_lock
@check_instance_host
@check_instance_cell
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
def stop(self, context, instance, do_cast=True, clean_shutdown=True):
"""Stop an instance."""
self.force_stop(context, instance, do_cast, clean_shutdown)
@check_instance_lock
@check_instance_host
@check_instance_cell
@check_instance_state(vm_state=[vm_states.STOPPED])
def start(self, context, instance):
"""Start an instance."""
LOG.debug("Going to try to start instance", instance=instance)
instance.task_state = task_states.POWERING_ON
instance.save(expected_task_state=[None])
self._record_action_start(context, instance, instance_actions.START)
# TODO(yamahata): injected_files isn't supported right now.
# It is used only for osapi. not for ec2 api.
# availability_zone isn't used by run_instance.
self.compute_rpcapi.start_instance(context, instance)
@check_instance_lock
@check_instance_host
@check_instance_cell
@check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
def trigger_crash_dump(self, context, instance):
"""Trigger crash dump in an instance."""
LOG.debug("Try to trigger crash dump", instance=instance)
self._record_action_start(context, instance,
instance_actions.TRIGGER_CRASH_DUMP)
self.compute_rpcapi.trigger_crash_dump(context, instance)
def _get_instance_map_or_none(self, context, instance_uuid):
try:
inst_map = objects.InstanceMapping.get_by_instance_uuid(
context, instance_uuid)
except exception.InstanceMappingNotFound:
# InstanceMapping should always be found generally. This exception
# may be raised if a deployment has partially migrated the nova-api
# services.
inst_map = None
return inst_map
def _get_instance(self, context, instance_uuid, expected_attrs):
# Before service version 15 the BuildRequest is not cleaned up during
# a delete request so there is no reason to look it up here as we can't
# trust that it's not referencing a deleted instance. Also even if
# there is an instance mapping we don't need to honor it for older
# service versions.
service_version = objects.Service.get_minimum_version(
context, 'nova-osapi_compute')
# If we're on cellsv1, we also need to consult the top-level
# merged replica instead of the cell directly, so fall through
# here in that case as well.
if service_version < 15 or CONF.cells.enable:
# If not using cells v1, we need to log a warning about the API
# service version being less than 15 (that check was added in
# newton), which indicates there is some lingering data during the
# transition to cells v2 which could cause an InstanceNotFound
# here. The warning message is a sort of breadcrumb.
# This can all go away once we drop cells v1 and assert that all
# deployments have upgraded from a base cells v2 setup with
# mappings.
if not CONF.cells.enable:
LOG.warning('The nova-osapi_compute service version is from '
'before Ocata and may cause problems looking up '
'instances in a cells v2 setup. Check your '
'nova-api service configuration and cell '
'mappings. You may need to remove stale '
'nova-osapi_compute service records from the cell '
'database.')
return objects.Instance.get_by_uuid(context, instance_uuid,
expected_attrs=expected_attrs)
inst_map = self._get_instance_map_or_none(context, instance_uuid)
if inst_map and (inst_map.cell_mapping is not None):
nova_context.set_target_cell(context, inst_map.cell_mapping)
instance = objects.Instance.get_by_uuid(
context, instance_uuid, expected_attrs=expected_attrs)
elif inst_map and (inst_map.cell_mapping is None):
# This means the instance has not been scheduled and put in
# a cell yet. For now it also may mean that the deployer
# has not created their cell(s) yet.
try:
build_req = objects.BuildRequest.get_by_instance_uuid(
context, instance_uuid)
instance = build_req.instance
except exception.BuildRequestNotFound:
# Instance was mapped and the BuildRequest was deleted
# while fetching. Try again.
inst_map = self._get_instance_map_or_none(context,
instance_uuid)
if inst_map and (inst_map.cell_mapping is not None):
nova_context.set_target_cell(context,
inst_map.cell_mapping)
instance = objects.Instance.get_by_uuid(
context, instance_uuid,
expected_attrs=expected_attrs)
else:
raise exception.InstanceNotFound(instance_id=instance_uuid)
else:
raise exception.InstanceNotFound(instance_id=instance_uuid)
return instance
def get(self, context, instance_id, expected_attrs=None):
"""Get a single instance with the given instance_id."""
if not expected_attrs:
expected_attrs = []
expected_attrs.extend(['metadata', 'system_metadata',
'security_groups', 'info_cache'])
# NOTE(ameade): we still need to support integer ids for ec2
try:
if uuidutils.is_uuid_like(instance_id):
LOG.debug("Fetching instance by UUID",
instance_uuid=instance_id)
instance = self._get_instance(context, instance_id,
expected_attrs)
else:
LOG.debug("Failed to fetch instance by id %s", instance_id)
raise exception.InstanceNotFound(instance_id=instance_id)
except exception.InvalidID:
LOG.debug("Invalid instance id %s", instance_id)
raise exception.InstanceNotFound(instance_id=instance_id)
return instance
def get_all(self, context, search_opts=None, limit=None, marker=None,
expected_attrs=None, sort_keys=None, sort_dirs=None):
"""Get all instances filtered by one of the given parameters.
If there is no filter and the context is an admin, it will retrieve
all instances in the system.
Deleted instances will be returned by default, unless there is a
search option that says otherwise.
The results will be sorted based on the list of sort keys in the
'sort_keys' parameter (first value is primary sort key, second value is
secondary sort ket, etc.). For each sort key, the associated sort
direction is based on the list of sort directions in the 'sort_dirs'
parameter.
"""
if search_opts is None:
search_opts = {}
LOG.debug("Searching by: %s", str(search_opts))
# Fixups for the DB call
filters = {}
def _remap_flavor_filter(flavor_id):
flavor = objects.Flavor.get_by_flavor_id(context, flavor_id)
filters['instance_type_id'] = flavor.id
def _remap_fixed_ip_filter(fixed_ip):
# Turn fixed_ip into a regexp match. Since '.' matches
# any character, we need to use regexp escaping for it.
filters['ip'] = '^%s$' % fixed_ip.replace('.', '\\.')
# search_option to filter_name mapping.
filter_mapping = {
'image': 'image_ref',
'name': 'display_name',
'tenant_id': 'project_id',
'flavor': _remap_flavor_filter,
'fixed_ip': _remap_fixed_ip_filter}
# copy from search_opts, doing various remappings as necessary
for opt, value in search_opts.items():
# Do remappings.
# Values not in the filter_mapping table are copied as-is.
# If remapping is None, option is not copied
# If the remapping is a string, it is the filter_name to use
try:
remap_object = filter_mapping[opt]
except KeyError:
filters[opt] = value
else:
# Remaps are strings to translate to, or functions to call
# to do the translating as defined by the table above.
if isinstance(remap_object, six.string_types):
filters[remap_object] = value
else:
try:
remap_object(value)
# We already know we can't match the filter, so
# return an empty list
except ValueError:
return objects.InstanceList()
# IP address filtering cannot be applied at the DB layer, remove any DB
# limit so that it can be applied after the IP filter.
filter_ip = 'ip6' in filters or 'ip' in filters
skip_build_request = False
orig_limit = limit
if filter_ip:
# We cannot skip build requests if there is a marker since the
# the marker could be a build request.
skip_build_request = marker is None
if self.network_api.has_substr_port_filtering_extension(context):
# We're going to filter by IP using Neutron so set filter_ip
# to False so we don't attempt post-DB query filtering in
# memory below.
filter_ip = False
instance_uuids = self._ip_filter_using_neutron(context,
filters)
if instance_uuids:
# Note that 'uuid' is not in the 2.1 GET /servers query
# parameter schema, however, we allow additionalProperties
# so someone could filter instances by uuid, which doesn't
# make a lot of sense but we have to account for it.
if 'uuid' in filters and filters['uuid']:
filter_uuids = filters['uuid']
if isinstance(filter_uuids, list):
instance_uuids.extend(filter_uuids)
else:
# Assume a string. If it's a dict or tuple or
# something, well...that's too bad. This is why
# we have query parameter schema definitions.
if filter_uuids not in instance_uuids:
instance_uuids.append(filter_uuids)
filters['uuid'] = instance_uuids
else:
# No matches on the ip filter(s), return an empty list.
return objects.InstanceList()
elif limit:
LOG.debug('Removing limit for DB query due to IP filter')
limit = None
# Skip get BuildRequest if filtering by IP address, as building
# instances will not have IP addresses.
if skip_build_request:
build_requests = objects.BuildRequestList()
else:
# The ordering of instances will be
# [sorted instances with no host] + [sorted instances with host].
# This means BuildRequest and cell0 instances first, then cell
# instances
try:
build_requests = objects.BuildRequestList.get_by_filters(
context, filters, limit=limit, marker=marker,
sort_keys=sort_keys, sort_dirs=sort_dirs)
# If we found the marker in we need to set it to None
# so we don't expect to find it in the cells below.
marker = None
except exception.MarkerNotFound:
# If we didn't find the marker in the build requests then keep
# looking for it in the cells.
build_requests = objects.BuildRequestList()
build_req_instances = objects.InstanceList(
objects=[build_req.instance for build_req in build_requests])
# Only subtract from limit if it is not None
limit = (limit - len(build_req_instances)) if limit else limit
# We could arguably avoid joining on security_groups if we're using
# neutron (which is the default) but if you're using neutron then the
# security_group_instance_association table should be empty anyway
# and the DB should optimize out that join, making it insignificant.
fields = ['metadata', 'info_cache', 'security_groups']
if expected_attrs:
fields.extend(expected_attrs)
if CONF.cells.enable:
insts = self._do_old_style_instance_list_for_poor_cellsv1_users(
context, filters, limit, marker, fields, sort_keys,
sort_dirs)
else:
insts = instance_list.get_instance_objects_sorted(
context, filters, limit, marker, fields, sort_keys, sort_dirs)
def _get_unique_filter_method():
seen_uuids = set()
def _filter(instance):
if instance.uuid in seen_uuids:
return False
seen_uuids.add(instance.uuid)
return True
return _filter
filter_method = _get_unique_filter_method()
# Only subtract from limit if it is not None
limit = (limit - len(insts)) if limit else limit
# TODO(alaski): Clean up the objects concatenation when List objects
# support it natively.
instances = objects.InstanceList(
objects=list(filter(filter_method,
build_req_instances.objects +
insts.objects)))
if filter_ip:
instances = self._ip_filter(instances, filters, orig_limit)
return instances
def _do_old_style_instance_list_for_poor_cellsv1_users(self,
context, filters,
limit, marker,
fields,
sort_keys,
sort_dirs):
try:
cell0_mapping = objects.CellMapping.get_by_uuid(context,
objects.CellMapping.CELL0_UUID)
except exception.CellMappingNotFound:
cell0_instances = objects.InstanceList(objects=[])
else:
with nova_context.target_cell(context, cell0_mapping) as cctxt:
try:
cell0_instances = self._get_instances_by_filters(
cctxt, filters, limit=limit, marker=marker,
fields=fields, sort_keys=sort_keys,
sort_dirs=sort_dirs)
# If we found the marker in cell0 we need to set it to None
# so we don't expect to find it in the cells below.
marker = None
except exception.MarkerNotFound:
# We can ignore this since we need to look in the cell DB
cell0_instances = objects.InstanceList(objects=[])
# Only subtract from limit if it is not None
limit = (limit - len(cell0_instances)) if limit else limit
# There is only planned support for a single cell here. Multiple cell
# instance lists should be proxied to project Searchlight, or a similar
# alternative.
if limit is None or limit > 0:
# NOTE(melwitt): If we're on cells v1, we need to read
# instances from the top-level database because reading from
# cells results in changed behavior, because of the syncing.
# We can remove this path once we stop supporting cells v1.
cell_instances = self._get_instances_by_filters(
context, filters, limit=limit, marker=marker,
fields=fields, sort_keys=sort_keys,
sort_dirs=sort_dirs)
else:
LOG.debug('Limit excludes any results from real cells')
cell_instances = objects.InstanceList(objects=[])
return cell0_instances + cell_instances
@staticmethod
def _ip_filter(inst_models, filters, limit):
ipv4_f = re.compile(str(filters.get('ip')))
ipv6_f = re.compile(str(filters.get('ip6')))
def _match_instance(instance):
nw_info = instance.get_network_info()
for vif in nw_info:
for fixed_ip in vif.fixed_ips():
address = fixed_ip.get('address')
if not address:
continue
version = fixed_ip.get('version')
if ((version == 4 and ipv4_f.match(address)) or
(version == 6 and ipv6_f.match(address))):
return True
return False
result_objs = []
for instance in inst_models:
if _match_instance(instance):
result_objs.append(instance)
if limit and len(result_objs) == limit:
break
return objects.InstanceList(objects=result_objs)
def _ip_filter_using_neutron(self, context, filters):
ip4_address = filters.get('ip')
ip6_address = filters.get('ip6')
addresses = [ip4_address, ip6_address]
uuids = []
for address in addresses:
if address:
try:
ports = self.network_api.list_ports(
context, fixed_ips='ip_address_substr=' + address,
fields=['device_id'])['ports']
for port in ports:
uuids.append(port['device_id'])
except Exception as e:
LOG.error('An error occurred while listing ports '
'with an ip_address filter value of "%s". '
'Error: %s',
address, six.text_type(e))
return uuids
def _get_instances_by_filters(self, context, filters,
limit=None, marker=None, fields=None,
sort_keys=None, sort_dirs=None):
return objects.InstanceList.get_by_filters(
context, filters=filters, limit=limit, marker=marker,
expected_attrs=fields, sort_keys=sort_keys, sort_dirs=sort_dirs)
def update_instance(self, context, instance, updates):
"""Updates a single Instance object with some updates dict.
Returns the updated instance.
"""
# NOTE(sbauza): Given we only persist the Instance object after we
# create the BuildRequest, we are sure that if the Instance object
# has an ID field set, then it was persisted in the right Cell DB.
if instance.obj_attr_is_set('id'):
instance.update(updates)
# Instance has been scheduled and the BuildRequest has been deleted
# we can directly write the update down to the right cell.
inst_map = self._get_instance_map_or_none(context, instance.uuid)
# If we have a cell_mapping and we're not on cells v1, then
# look up the instance in the cell database
if inst_map and (inst_map.cell_mapping is not None) and (
not CONF.cells.enable):
with nova_context.target_cell(context,
inst_map.cell_mapping) as cctxt:
with instance.obj_alternate_context(cctxt):
instance.save()
else:
# If inst_map.cell_mapping does not point at a cell then cell
# migration has not happened yet.
# TODO(alaski): Make this a failure case after we put in
# a block that requires migrating to cellsv2.
instance.save()
else:
# Instance is not yet mapped to a cell, so we need to update
# BuildRequest instead
# TODO(sbauza): Fix the possible race conditions where BuildRequest
# could be deleted because of either a concurrent instance delete
# or because the scheduler just returned a destination right
# after we called the instance in the API.
try:
build_req = objects.BuildRequest.get_by_instance_uuid(
context, instance.uuid)
instance = build_req.instance
instance.update(updates)
# FIXME(sbauza): Here we are updating the current
# thread-related BuildRequest object. Given that another worker
# could have looking up at that BuildRequest in the API, it
# means that it could pass it down to the conductor without
# making sure that it's not updated, we could have some race
# condition where it would missing the updated fields, but
# that's something we could discuss once the instance record
# is persisted by the conductor.
build_req.save()
except exception.BuildRequestNotFound:
# Instance was mapped and the BuildRequest was deleted
# while fetching (and possibly the instance could have been
# deleted as well). We need to lookup again the Instance object
# in order to correctly update it.
# TODO(sbauza): Figure out a good way to know the expected
# attributes by checking which fields are set or not.
expected_attrs = ['flavor', 'pci_devices', 'numa_topology',
'tags', 'metadata', 'system_metadata',
'security_groups', 'info_cache']
inst_map = self._get_instance_map_or_none(context,
instance.uuid)
if inst_map and (inst_map.cell_mapping is not None):
with nova_context.target_cell(
context,
inst_map.cell_mapping) as cctxt:
instance = objects.Instance.get_by_uuid(
cctxt, instance.uuid,
expected_attrs=expected_attrs)
instance.update(updates)
instance.save()
else:
# If inst_map.cell_mapping does not point at a cell then
# cell migration has not happened yet.
# TODO(alaski): Make this a failure case after we put in
# a block that requires migrating to cellsv2.
instance = objects.Instance.get_by_uuid(
context, instance.uuid, expected_attrs=expected_attrs)
instance.update(updates)
instance.save()
return instance
# NOTE(melwitt): We don't check instance lock for backup because lock is
# intended to prevent accidental change/delete of instances
@check_instance_cell
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.PAUSED, vm_states.SUSPENDED])
def backup(self, context, instance, name, backup_type, rotation,
extra_properties=None):
"""Backup the given instance
:param instance: nova.objects.instance.Instance object
:param name: name of the backup
:param backup_type: 'daily' or 'weekly'
:param rotation: int representing how many backups to keep around;
None if rotation shouldn't be used (as in the case of snapshots)
:param extra_properties: dict of extra image properties to include
when creating the image.
:returns: A dict containing image metadata
"""
props_copy = dict(extra_properties, backup_type=backup_type)
if compute_utils.is_volume_backed_instance(context, instance):
LOG.info("It's not supported to backup volume backed "
"instance.", instance=instance)
raise exception.InvalidRequest(
_('Backup is not supported for volume-backed instances.'))
else:
image_meta = self._create_image(context, instance,
name, 'backup',
extra_properties=props_copy)
# NOTE(comstud): Any changes to this method should also be made
# to the backup_instance() method in nova/cells/messaging.py
instance.task_state = task_states.IMAGE_BACKUP
instance.save(expected_task_state=[None])
self._record_action_start(context, instance,
instance_actions.BACKUP)
self.compute_rpcapi.backup_instance(context, instance,
image_meta['id'],
backup_type,
rotation)
return image_meta
# NOTE(melwitt): We don't check instance lock for snapshot because lock is
# intended to prevent accidental change/delete of instances
@check_instance_cell
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.PAUSED, vm_states.SUSPENDED])
def snapshot(self, context, instance, name, extra_properties=None):
"""Snapshot the given instance.
:param instance: nova.objects.instance.Instance object
:param name: name of the snapshot
:param extra_properties: dict of extra image properties to include
when creating the image.
:returns: A dict containing image metadata
"""
image_meta = self._create_image(context, instance, name,
'snapshot',
extra_properties=extra_properties)
# NOTE(comstud): Any changes to this method should also be made
# to the snapshot_instance() method in nova/cells/messaging.py
instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
try:
instance.save(expected_task_state=[None])
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as ex:
# Changing the instance task state to use in raising the
# InstanceInvalidException below
LOG.debug('Instance disappeared during snapshot.',
instance=instance)
try:
image_id = image_meta['id']
self.image_api.delete(context, image_id)
LOG.info('Image %s deleted because instance '
'deleted before snapshot started.',
image_id, instance=instance)
except exception.ImageNotFound:
pass
except Exception as exc:
LOG.warning("Error while trying to clean up image %(img_id)s: "
"%(error_msg)s",
{"img_id": image_meta['id'],
"error_msg": six.text_type(exc)})
attr = 'task_state'
state = task_states.DELETING
if type(ex) == exception.InstanceNotFound:
attr = 'vm_state'
state = vm_states.DELETED
raise exception.InstanceInvalidState(attr=attr,
instance_uuid=instance.uuid,
state=state,
method='snapshot')
self._record_action_start(context, instance,
instance_actions.CREATE_IMAGE)
self.compute_rpcapi.snapshot_instance(context, instance,
image_meta['id'])
return image_meta
def _create_image(self, context, instance, name, image_type,
extra_properties=None):
"""Create new image entry in the image service. This new image
will be reserved for the compute manager to upload a snapshot
or backup.
:param context: security context
:param instance: nova.objects.instance.Instance object
:param name: string for name of the snapshot
:param image_type: snapshot | backup
:param extra_properties: dict of extra image properties to include
"""
properties = {
'instance_uuid': instance.uuid,
'user_id': str(context.user_id),
'image_type': image_type,
}
properties.update(extra_properties or {})
image_meta = self._initialize_instance_snapshot_metadata(
instance, name, properties)
# if we're making a snapshot, omit the disk and container formats,
# since the image may have been converted to another format, and the
# original values won't be accurate. The driver will populate these
# with the correct values later, on image upload.
if image_type == 'snapshot':
image_meta.pop('disk_format', None)
image_meta.pop('container_format', None)
return self.image_api.create(context, image_meta)
def _initialize_instance_snapshot_metadata(self, instance, name,
extra_properties=None):
"""Initialize new metadata for a snapshot of the given instance.
:param instance: nova.objects.instance.Instance object
:param name: string for name of the snapshot
:param extra_properties: dict of extra metadata properties to include
:returns: the new instance snapshot metadata
"""
image_meta = utils.get_image_from_system_metadata(
instance.system_metadata)
image_meta.update({'name': name,
'is_public': False})
# Delete properties that are non-inheritable
properties = image_meta['properties']
for key in CONF.non_inheritable_image_properties:
properties.pop(key, None)
# The properties in extra_properties have precedence
properties.update(extra_properties or {})
return image_meta
# NOTE(melwitt): We don't check instance lock for snapshot because lock is
# intended to prevent accidental change/delete of instances
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.SUSPENDED])
def snapshot_volume_backed(self, context, instance, name,
extra_properties=None):
"""Snapshot the given volume-backed instance.
:param instance: nova.objects.instance.Instance object
:param name: name of the backup or snapshot
:param extra_properties: dict of extra image properties to include
:returns: the new image metadata
"""
image_meta = self._initialize_instance_snapshot_metadata(
instance, name, extra_properties)
# the new image is simply a bucket of properties (particularly the
# block device mapping, kernel and ramdisk IDs) with no image data,
# hence the zero size
image_meta['size'] = 0
for attr in ('container_format', 'disk_format'):
image_meta.pop(attr, None)
properties = image_meta['properties']
# clean properties before filling
for key in ('block_device_mapping', 'bdm_v2', 'root_device_name'):
properties.pop(key, None)
if instance.root_device_name:
properties['root_device_name'] = instance.root_device_name
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
mapping = [] # list of BDM dicts that can go into the image properties
# Do some up-front filtering of the list of BDMs from
# which we are going to create snapshots.
volume_bdms = []
for bdm in bdms:
if bdm.no_device:
continue
if bdm.is_volume:
# These will be handled below.
volume_bdms.append(bdm)
else:
mapping.append(bdm.get_image_mapping())
# Check limits in Cinder before creating snapshots to avoid going over
# quota in the middle of a list of volumes. This is a best-effort check
# but concurrently running snapshot requests from the same project
# could still fail to create volume snapshots if they go over limit.
if volume_bdms:
limits = self.volume_api.get_absolute_limits(context)
total_snapshots_used = limits['totalSnapshotsUsed']
max_snapshots = limits['maxTotalSnapshots']
# -1 means there is unlimited quota for snapshots
if (max_snapshots > -1 and
len(volume_bdms) + total_snapshots_used > max_snapshots):
LOG.debug('Unable to create volume snapshots for instance. '
'Currently has %s snapshots, requesting %s new '
'snapshots, with a limit of %s.',
total_snapshots_used, len(volume_bdms),
max_snapshots, instance=instance)
raise exception.OverQuota(overs='snapshots')
quiesced = False
if instance.vm_state == vm_states.ACTIVE: