Skip to content

Commit

Permalink
Count instances to check quota
Browse files Browse the repository at this point in the history
This changes instances, cores, and ram from ReservableResources to
CountableResources and replaces quota reserve/commit/rollback with
check_deltas accordingly.

All of the reservation and usage related unit tests are removed because:

  1. They rely on some global QuotaEngine resources being
     ReservableResources and every ReservableResource has been removed.
  2. Reservations and usages are no longer in use anywhere in the
     codebase.

Part of blueprint cells-count-resources-to-check-quota-in-api

Change-Id: I9269ffa2b80e48db96c622d0dc0817738854f602
  • Loading branch information
melwitt committed Jul 18, 2017
1 parent 430ec65 commit 5c90b25
Show file tree
Hide file tree
Showing 22 changed files with 1,213 additions and 2,098 deletions.
501 changes: 145 additions & 356 deletions nova/compute/api.py

Large diffs are not rendered by default.

286 changes: 80 additions & 206 deletions nova/compute/manager.py

Large diffs are not rendered by default.

126 changes: 126 additions & 0 deletions nova/compute/utils.py
Expand Up @@ -687,6 +687,132 @@ def reserve_quota_delta(context, deltas, instance):
return quotas


def get_headroom(quotas, usages, deltas):
headroom = {res: quotas[res] - usages[res]
for res in quotas.keys()}
# If quota_cores is unlimited [-1]:
# - set cores headroom based on instances headroom:
if quotas.get('cores') == -1:
if deltas.get('cores'):
hc = headroom.get('instances', 1) * deltas['cores']
headroom['cores'] = hc / deltas.get('instances', 1)
else:
headroom['cores'] = headroom.get('instances', 1)

# If quota_ram is unlimited [-1]:
# - set ram headroom based on instances headroom:
if quotas.get('ram') == -1:
if deltas.get('ram'):
hr = headroom.get('instances', 1) * deltas['ram']
headroom['ram'] = hr / deltas.get('instances', 1)
else:
headroom['ram'] = headroom.get('instances', 1)

return headroom


def check_num_instances_quota(context, instance_type, min_count,
max_count, project_id=None, user_id=None,
orig_num_req=None):
"""Enforce quota limits on number of instances created."""
# project_id is used for the TooManyInstances error message
if project_id is None:
project_id = context.project_id
# Determine requested cores and ram
req_cores = max_count * instance_type.vcpus
req_ram = max_count * instance_type.memory_mb
deltas = {'instances': max_count, 'cores': req_cores, 'ram': req_ram}

try:
objects.Quotas.check_deltas(context, deltas,
project_id, user_id=user_id,
check_project_id=project_id,
check_user_id=user_id)
except exception.OverQuota as exc:
quotas = exc.kwargs['quotas']
overs = exc.kwargs['overs']
usages = exc.kwargs['usages']
# This is for the recheck quota case where we used a delta of zero.
if min_count == max_count == 0:
# orig_num_req is the original number of instances requested in the
# case of a recheck quota, for use in the over quota exception.
req_cores = orig_num_req * instance_type.vcpus
req_ram = orig_num_req * instance_type.memory_mb
requested = {'instances': orig_num_req, 'cores': req_cores,
'ram': req_ram}
(overs, reqs, total_alloweds, useds) = get_over_quota_detail(
deltas, overs, quotas, requested)
msg = "Cannot run any more instances of this type."
params = {'overs': overs, 'pid': project_id, 'msg': msg}
LOG.debug("%(overs)s quota exceeded for %(pid)s. %(msg)s",
params)
raise exception.TooManyInstances(overs=overs,
req=reqs,
used=useds,
allowed=total_alloweds)
# OK, we exceeded quota; let's figure out why...
headroom = get_headroom(quotas, usages, deltas)

allowed = headroom.get('instances', 1)
# Reduce 'allowed' instances in line with the cores & ram headroom
if instance_type.vcpus:
allowed = min(allowed,
headroom['cores'] // instance_type.vcpus)
if instance_type.memory_mb:
allowed = min(allowed,
headroom['ram'] // instance_type.memory_mb)

# Convert to the appropriate exception message
if allowed <= 0:
msg = "Cannot run any more instances of this type."
elif min_count <= allowed <= max_count:
# We're actually OK, but still need to check against allowed
return check_num_instances_quota(context, instance_type, min_count,
allowed, project_id=project_id,
user_id=user_id)
else:
msg = "Can only run %s more instances of this type." % allowed

num_instances = (str(min_count) if min_count == max_count else
"%s-%s" % (min_count, max_count))
requested = dict(instances=num_instances, cores=req_cores,
ram=req_ram)
(overs, reqs, total_alloweds, useds) = get_over_quota_detail(
headroom, overs, quotas, requested)
params = {'overs': overs, 'pid': project_id,
'min_count': min_count, 'max_count': max_count,
'msg': msg}

if min_count == max_count:
LOG.debug("%(overs)s quota exceeded for %(pid)s,"
" tried to run %(min_count)d instances. "
"%(msg)s", params)
else:
LOG.debug("%(overs)s quota exceeded for %(pid)s,"
" tried to run between %(min_count)d and"
" %(max_count)d instances. %(msg)s",
params)
raise exception.TooManyInstances(overs=overs,
req=reqs,
used=useds,
allowed=total_alloweds)

return max_count


def get_over_quota_detail(headroom, overs, quotas, requested):
reqs = []
useds = []
total_alloweds = []
for resource in overs:
reqs.append(str(requested[resource]))
useds.append(str(quotas[resource] - headroom[resource]))
total_alloweds.append(str(quotas[resource]))
(overs, reqs, useds, total_alloweds) = map(', '.join, (
overs, reqs, useds, total_alloweds))
return overs, reqs, total_alloweds, useds


def remove_shelved_keys_from_system_metadata(instance):
# Delete system_metadata for a shelved instance
for key in ['shelved_at', 'shelved_image_id', 'shelved_host']:
Expand Down
62 changes: 56 additions & 6 deletions nova/conductor/manager.py
Expand Up @@ -925,15 +925,11 @@ def schedule_and_build_instances(self, context, build_requests,
return

host_mapping_cache = {}
instances = []

for (build_request, request_spec, host) in six.moves.zip(
build_requests, request_specs, hosts):
filter_props = request_spec.to_legacy_filter_properties_dict()
instance = build_request.get_new_instance(context)
scheduler_utils.populate_retry(filter_props, instance.uuid)
scheduler_utils.populate_filter_properties(filter_props,
host)

# Convert host from the scheduler into a cell record
if host['host'] not in host_mapping_cache:
try:
Expand All @@ -947,6 +943,8 @@ def schedule_and_build_instances(self, context, build_requests,
self._bury_in_cell0(context, request_spec, exc,
build_requests=[build_request],
instances=[instance])
# This is a placeholder in case the quota recheck fails.
instances.append(None)
continue
else:
host_mapping = host_mapping_cache[host['host']]
Expand All @@ -963,14 +961,43 @@ def schedule_and_build_instances(self, context, build_requests,
# the build request is gone so we're done for this instance
LOG.debug('While scheduling instance, the build request '
'was already deleted.', instance=instance)
# This is a placeholder in case the quota recheck fails.
instances.append(None)
continue
else:
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host['host']))
with obj_target_cell(instance, cell):
instance.create()

instances.append(instance)

# NOTE(melwitt): We recheck the quota after creating the
# objects to prevent users from allocating more resources
# than their allowed quota in the event of a race. This is
# configurable because it can be expensive if strict quota
# limits are not required in a deployment.
if CONF.quota.recheck_quota:
try:
compute_utils.check_num_instances_quota(
context, instance.flavor, 0, 0,
orig_num_req=len(build_requests))
except exception.TooManyInstances as exc:
with excutils.save_and_reraise_exception():
self._cleanup_build_artifacts(context, exc, instances,
build_requests,
request_specs)

for (build_request, request_spec, host, instance) in six.moves.zip(
build_requests, request_specs, hosts, instances):
if instance is None:
# Skip placeholders that were buried in cell0 or had their
# build requests deleted by the user before instance create.
continue
filter_props = request_spec.to_legacy_filter_properties_dict()
scheduler_utils.populate_retry(filter_props, instance.uuid)
scheduler_utils.populate_filter_properties(filter_props,
host)
# send a state update notification for the initial create to
# show it going from non-existent to BUILDING
notifications.send_update_with_states(context, instance, None,
Expand Down Expand Up @@ -1019,6 +1046,29 @@ def schedule_and_build_instances(self, context, build_requests,
host=host['host'], node=host['nodename'],
limits=host['limits'])

def _cleanup_build_artifacts(self, context, exc, instances, build_requests,
request_specs):
for (instance, build_request, request_spec) in six.moves.zip(
instances, build_requests, request_specs):
# Skip placeholders that were buried in cell0 or had their
# build requests deleted by the user before instance create.
if instance is None:
continue
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
legacy_spec = request_spec.to_legacy_request_spec_dict()
self._set_vm_state_and_notify(context, instance.uuid,
'build_instances', updates, exc,
legacy_spec)
# Be paranoid about artifacts being deleted underneath us.
try:
build_request.destroy()
except exception.BuildRequestNotFound:
pass
try:
request_spec.destroy()
except exception.RequestSpecNotFound:
pass

def _delete_build_request(self, context, build_request, instance, cell,
instance_bdms, instance_tags):
"""Delete a build request after creating the instance in the cell.
Expand Down
7 changes: 1 addition & 6 deletions nova/conductor/tasks/migrate.py
Expand Up @@ -30,15 +30,11 @@ def __init__(self, context, instance, flavor,
self.request_spec = request_spec
self.reservations = reservations
self.flavor = flavor
self.quotas = None

self.compute_rpcapi = compute_rpcapi
self.scheduler_client = scheduler_client

def _execute(self):
self.quotas = objects.Quotas.from_reservations(self.context,
self.reservations,
instance=self.instance)
# TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec
# object in the signature and all the scheduler.utils methods too
legacy_spec = self.request_spec.to_legacy_request_spec_dict()
Expand Down Expand Up @@ -96,5 +92,4 @@ def _execute(self):
node=node, clean_shutdown=self.clean_shutdown)

def rollback(self):
if self.quotas:
self.quotas.rollback()
pass
55 changes: 54 additions & 1 deletion nova/objects/instance.py
Expand Up @@ -20,7 +20,10 @@
from oslo_serialization import jsonutils
from oslo_utils import timeutils
from oslo_utils import versionutils
from sqlalchemy import or_
from sqlalchemy.orm import joinedload
from sqlalchemy.sql import func
from sqlalchemy.sql import null

from nova.cells import opts as cells_opts
from nova.cells import rpcapi as cells_rpcapi
Expand Down Expand Up @@ -1206,7 +1209,8 @@ class InstanceList(base.ObjectListBase, base.NovaObject):
# Version 2.1: Add get_uuids_by_host()
# Version 2.2: Pagination for get_active_by_window_joined()
# Version 2.3: Add get_count_by_vm_state()
VERSION = '2.3'
# Version 2.4: Add get_counts()
VERSION = '2.4'

fields = {
'objects': fields.ListOfObjectsField('Instance'),
Expand Down Expand Up @@ -1407,6 +1411,55 @@ def get_count_by_vm_state(cls, context, project_id, user_id, vm_state):
return cls._get_count_by_vm_state_in_db(context, project_id, user_id,
vm_state)

@staticmethod
@db_api.pick_context_manager_reader
def _get_counts_in_db(context, project_id, user_id=None):
# NOTE(melwitt): Copied from nova/db/sqlalchemy/api.py:
# It would be better to have vm_state not be nullable
# but until then we test it explicitly as a workaround.
not_soft_deleted = or_(
models.Instance.vm_state != vm_states.SOFT_DELETED,
models.Instance.vm_state == null()
)
project_query = context.session.query(
func.count(models.Instance.id),
func.sum(models.Instance.vcpus),
func.sum(models.Instance.memory_mb)).\
filter_by(deleted=0).\
filter(not_soft_deleted).\
filter_by(project_id=project_id)

project_result = project_query.first()
fields = ('instances', 'cores', 'ram')
project_counts = {field: int(project_result[idx] or 0)
for idx, field in enumerate(fields)}
counts = {'project': project_counts}
if user_id:
user_result = project_query.filter_by(user_id=user_id).first()
user_counts = {field: int(user_result[idx] or 0)
for idx, field in enumerate(fields)}
counts['user'] = user_counts
return counts

@base.remotable_classmethod
def get_counts(cls, context, project_id, user_id=None):
"""Get the counts of Instance objects in the database.
:param context: The request context for database access
:param project_id: The project_id to count across
:param user_id: The user_id to count across
:returns: A dict containing the project-scoped counts and user-scoped
counts if user_id is specified. For example:
{'project': {'instances': <count across project>,
'cores': <count across project>,
'ram': <count across project},
'user': {'instances': <count across user>,
'cores': <count across user>,
'ram': <count across user>}}
"""
return cls._get_counts_in_db(context, project_id, user_id=user_id)


@db_api.pick_context_manager_writer
def _migrate_instance_keypairs(ctxt, count):
Expand Down

0 comments on commit 5c90b25

Please sign in to comment.