Count instances to check quota

This changes instances, cores, and ram from ReservableResources to CountableResources and replaces quota reserve/commit/rollback with check_deltas accordingly. All of the reservation and usage related unit tests are removed because: 1. They rely on some global QuotaEngine resources being ReservableResources and every ReservableResource has been removed. 2. Reservations and usages are no longer in use anywhere in the codebase. Part of blueprint cells-count-resources-to-check-quota-in-api Change-Id: I9269ffa2b80e48db96c622d0dc0817738854f602
openstack · Jul 18, 2017 · 5c90b25 · 5c90b25
1 parent 430ec65
commit 5c90b25
Show file tree

Hide file tree

Showing 22 changed files with 1,213 additions and 2,098 deletions.
diff --git a/nova/compute/api.py b/nova/compute/api.py
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
diff --git a/nova/compute/utils.py b/nova/compute/utils.py
@@ -687,6 +687,132 @@ def reserve_quota_delta(context, deltas, instance):
     return quotas
 
 
+def get_headroom(quotas, usages, deltas):
+    headroom = {res: quotas[res] - usages[res]
+                for res in quotas.keys()}
+    # If quota_cores is unlimited [-1]:
+    # - set cores headroom based on instances headroom:
+    if quotas.get('cores') == -1:
+        if deltas.get('cores'):
+            hc = headroom.get('instances', 1) * deltas['cores']
+            headroom['cores'] = hc / deltas.get('instances', 1)
+        else:
+            headroom['cores'] = headroom.get('instances', 1)
+
+    # If quota_ram is unlimited [-1]:
+    # - set ram headroom based on instances headroom:
+    if quotas.get('ram') == -1:
+        if deltas.get('ram'):
+            hr = headroom.get('instances', 1) * deltas['ram']
+            headroom['ram'] = hr / deltas.get('instances', 1)
+        else:
+            headroom['ram'] = headroom.get('instances', 1)
+
+    return headroom
+
+
+def check_num_instances_quota(context, instance_type, min_count,
+                              max_count, project_id=None, user_id=None,
+                              orig_num_req=None):
+    """Enforce quota limits on number of instances created."""
+    # project_id is used for the TooManyInstances error message
+    if project_id is None:
+        project_id = context.project_id
+    # Determine requested cores and ram
+    req_cores = max_count * instance_type.vcpus
+    req_ram = max_count * instance_type.memory_mb
+    deltas = {'instances': max_count, 'cores': req_cores, 'ram': req_ram}
+
+    try:
+        objects.Quotas.check_deltas(context, deltas,
+                                    project_id, user_id=user_id,
+                                    check_project_id=project_id,
+                                    check_user_id=user_id)
+    except exception.OverQuota as exc:
+        quotas = exc.kwargs['quotas']
+        overs = exc.kwargs['overs']
+        usages = exc.kwargs['usages']
+        # This is for the recheck quota case where we used a delta of zero.
+        if min_count == max_count == 0:
+            # orig_num_req is the original number of instances requested in the
+            # case of a recheck quota, for use in the over quota exception.
+            req_cores = orig_num_req * instance_type.vcpus
+            req_ram = orig_num_req * instance_type.memory_mb
+            requested = {'instances': orig_num_req, 'cores': req_cores,
+                         'ram': req_ram}
+            (overs, reqs, total_alloweds, useds) = get_over_quota_detail(
+                deltas, overs, quotas, requested)
+            msg = "Cannot run any more instances of this type."
+            params = {'overs': overs, 'pid': project_id, 'msg': msg}
+            LOG.debug("%(overs)s quota exceeded for %(pid)s. %(msg)s",
+                      params)
+            raise exception.TooManyInstances(overs=overs,
+                                             req=reqs,
+                                             used=useds,
+                                             allowed=total_alloweds)
+        # OK, we exceeded quota; let's figure out why...
+        headroom = get_headroom(quotas, usages, deltas)
+
+        allowed = headroom.get('instances', 1)
+        # Reduce 'allowed' instances in line with the cores & ram headroom
+        if instance_type.vcpus:
+            allowed = min(allowed,
+                          headroom['cores'] // instance_type.vcpus)
+        if instance_type.memory_mb:
+            allowed = min(allowed,
+                          headroom['ram'] // instance_type.memory_mb)
+
+        # Convert to the appropriate exception message
+        if allowed <= 0:
+            msg = "Cannot run any more instances of this type."
+        elif min_count <= allowed <= max_count:
+            # We're actually OK, but still need to check against allowed
+            return check_num_instances_quota(context, instance_type, min_count,
+                                             allowed, project_id=project_id,
+                                             user_id=user_id)
+        else:
+            msg = "Can only run %s more instances of this type." % allowed
+
+        num_instances = (str(min_count) if min_count == max_count else
+            "%s-%s" % (min_count, max_count))
+        requested = dict(instances=num_instances, cores=req_cores,
+                         ram=req_ram)
+        (overs, reqs, total_alloweds, useds) = get_over_quota_detail(
+            headroom, overs, quotas, requested)
+        params = {'overs': overs, 'pid': project_id,
+                  'min_count': min_count, 'max_count': max_count,
+                  'msg': msg}
+
+        if min_count == max_count:
+            LOG.debug("%(overs)s quota exceeded for %(pid)s,"
+                      " tried to run %(min_count)d instances. "
+                      "%(msg)s", params)
+        else:
+            LOG.debug("%(overs)s quota exceeded for %(pid)s,"
+                      " tried to run between %(min_count)d and"
+                      " %(max_count)d instances. %(msg)s",
+                      params)
+        raise exception.TooManyInstances(overs=overs,
+                                         req=reqs,
+                                         used=useds,
+                                         allowed=total_alloweds)
+
+    return max_count
+
+
+def get_over_quota_detail(headroom, overs, quotas, requested):
+    reqs = []
+    useds = []
+    total_alloweds = []
+    for resource in overs:
+        reqs.append(str(requested[resource]))
+        useds.append(str(quotas[resource] - headroom[resource]))
+        total_alloweds.append(str(quotas[resource]))
+    (overs, reqs, useds, total_alloweds) = map(', '.join, (
+        overs, reqs, useds, total_alloweds))
+    return overs, reqs, total_alloweds, useds
+
+
 def remove_shelved_keys_from_system_metadata(instance):
     # Delete system_metadata for a shelved instance
     for key in ['shelved_at', 'shelved_image_id', 'shelved_host']:

diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
@@ -925,15 +925,11 @@ def schedule_and_build_instances(self, context, build_requests,
             return
 
         host_mapping_cache = {}
+        instances = []
 
         for (build_request, request_spec, host) in six.moves.zip(
                 build_requests, request_specs, hosts):
-            filter_props = request_spec.to_legacy_filter_properties_dict()
             instance = build_request.get_new_instance(context)
-            scheduler_utils.populate_retry(filter_props, instance.uuid)
-            scheduler_utils.populate_filter_properties(filter_props,
-                                                       host)
-
             # Convert host from the scheduler into a cell record
             if host['host'] not in host_mapping_cache:
                 try:
@@ -947,6 +943,8 @@ def schedule_and_build_instances(self, context, build_requests,
                     self._bury_in_cell0(context, request_spec, exc,
                                         build_requests=[build_request],
                                         instances=[instance])
+                    # This is a placeholder in case the quota recheck fails.
+                    instances.append(None)
                     continue
             else:
                 host_mapping = host_mapping_cache[host['host']]
@@ -963,14 +961,43 @@ def schedule_and_build_instances(self, context, build_requests,
                 # the build request is gone so we're done for this instance
                 LOG.debug('While scheduling instance, the build request '
                           'was already deleted.', instance=instance)
+                # This is a placeholder in case the quota recheck fails.
+                instances.append(None)
                 continue
             else:
                 instance.availability_zone = (
                     availability_zones.get_host_availability_zone(
                         context, host['host']))
                 with obj_target_cell(instance, cell):
                     instance.create()
-
+                    instances.append(instance)
+
+        # NOTE(melwitt): We recheck the quota after creating the
+        # objects to prevent users from allocating more resources
+        # than their allowed quota in the event of a race. This is
+        # configurable because it can be expensive if strict quota
+        # limits are not required in a deployment.
+        if CONF.quota.recheck_quota:
+            try:
+                compute_utils.check_num_instances_quota(
+                    context, instance.flavor, 0, 0,
+                    orig_num_req=len(build_requests))
+            except exception.TooManyInstances as exc:
+                with excutils.save_and_reraise_exception():
+                    self._cleanup_build_artifacts(context, exc, instances,
+                                                  build_requests,
+                                                  request_specs)
+
+        for (build_request, request_spec, host, instance) in six.moves.zip(
+                build_requests, request_specs, hosts, instances):
+            if instance is None:
+                # Skip placeholders that were buried in cell0 or had their
+                # build requests deleted by the user before instance create.
+                continue
+            filter_props = request_spec.to_legacy_filter_properties_dict()
+            scheduler_utils.populate_retry(filter_props, instance.uuid)
+            scheduler_utils.populate_filter_properties(filter_props,
+                                                       host)
             # send a state update notification for the initial create to
             # show it going from non-existent to BUILDING
             notifications.send_update_with_states(context, instance, None,
@@ -1019,6 +1046,29 @@ def schedule_and_build_instances(self, context, build_requests,
                     host=host['host'], node=host['nodename'],
                     limits=host['limits'])
 
+    def _cleanup_build_artifacts(self, context, exc, instances, build_requests,
+                                 request_specs):
+        for (instance, build_request, request_spec) in six.moves.zip(
+                instances, build_requests, request_specs):
+            # Skip placeholders that were buried in cell0 or had their
+            # build requests deleted by the user before instance create.
+            if instance is None:
+                continue
+            updates = {'vm_state': vm_states.ERROR, 'task_state': None}
+            legacy_spec = request_spec.to_legacy_request_spec_dict()
+            self._set_vm_state_and_notify(context, instance.uuid,
+                                          'build_instances', updates, exc,
+                                          legacy_spec)
+            # Be paranoid about artifacts being deleted underneath us.
+            try:
+                build_request.destroy()
+            except exception.BuildRequestNotFound:
+                pass
+            try:
+                request_spec.destroy()
+            except exception.RequestSpecNotFound:
+                pass
+
     def _delete_build_request(self, context, build_request, instance, cell,
                               instance_bdms, instance_tags):
         """Delete a build request after creating the instance in the cell.

diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py
@@ -30,15 +30,11 @@ def __init__(self, context, instance, flavor,
         self.request_spec = request_spec
         self.reservations = reservations
         self.flavor = flavor
-        self.quotas = None
 
         self.compute_rpcapi = compute_rpcapi
         self.scheduler_client = scheduler_client
 
     def _execute(self):
-        self.quotas = objects.Quotas.from_reservations(self.context,
-                                                       self.reservations,
-                                                       instance=self.instance)
         # TODO(sbauza): Remove that once prep_resize() accepts a  RequestSpec
         # object in the signature and all the scheduler.utils methods too
         legacy_spec = self.request_spec.to_legacy_request_spec_dict()
@@ -96,5 +92,4 @@ def _execute(self):
             node=node, clean_shutdown=self.clean_shutdown)
 
     def rollback(self):
-        if self.quotas:
-            self.quotas.rollback()
+        pass
diff --git a/nova/objects/instance.py b/nova/objects/instance.py
@@ -20,7 +20,10 @@
 from oslo_serialization import jsonutils
 from oslo_utils import timeutils
 from oslo_utils import versionutils
+from sqlalchemy import or_
 from sqlalchemy.orm import joinedload
+from sqlalchemy.sql import func
+from sqlalchemy.sql import null
 
 from nova.cells import opts as cells_opts
 from nova.cells import rpcapi as cells_rpcapi
@@ -1206,7 +1209,8 @@ class InstanceList(base.ObjectListBase, base.NovaObject):
     # Version 2.1: Add get_uuids_by_host()
     # Version 2.2: Pagination for get_active_by_window_joined()
     # Version 2.3: Add get_count_by_vm_state()
-    VERSION = '2.3'
+    # Version 2.4: Add get_counts()
+    VERSION = '2.4'
 
     fields = {
         'objects': fields.ListOfObjectsField('Instance'),
@@ -1407,6 +1411,55 @@ def get_count_by_vm_state(cls, context, project_id, user_id, vm_state):
         return cls._get_count_by_vm_state_in_db(context, project_id, user_id,
                                                 vm_state)
 
+    @staticmethod
+    @db_api.pick_context_manager_reader
+    def _get_counts_in_db(context, project_id, user_id=None):
+        # NOTE(melwitt): Copied from nova/db/sqlalchemy/api.py:
+        # It would be better to have vm_state not be nullable
+        # but until then we test it explicitly as a workaround.
+        not_soft_deleted = or_(
+            models.Instance.vm_state != vm_states.SOFT_DELETED,
+            models.Instance.vm_state == null()
+            )
+        project_query = context.session.query(
+            func.count(models.Instance.id),
+            func.sum(models.Instance.vcpus),
+            func.sum(models.Instance.memory_mb)).\
+            filter_by(deleted=0).\
+            filter(not_soft_deleted).\
+            filter_by(project_id=project_id)
+
+        project_result = project_query.first()
+        fields = ('instances', 'cores', 'ram')
+        project_counts = {field: int(project_result[idx] or 0)
+                          for idx, field in enumerate(fields)}
+        counts = {'project': project_counts}
+        if user_id:
+            user_result = project_query.filter_by(user_id=user_id).first()
+            user_counts = {field: int(user_result[idx] or 0)
+                           for idx, field in enumerate(fields)}
+            counts['user'] = user_counts
+        return counts
+
+    @base.remotable_classmethod
+    def get_counts(cls, context, project_id, user_id=None):
+        """Get the counts of Instance objects in the database.
+
+        :param context: The request context for database access
+        :param project_id: The project_id to count across
+        :param user_id: The user_id to count across
+        :returns: A dict containing the project-scoped counts and user-scoped
+                  counts if user_id is specified. For example:
+
+                    {'project': {'instances': <count across project>,
+                                 'cores': <count across project>,
+                                 'ram': <count across project},
+                     'user': {'instances': <count across user>,
+                              'cores': <count across user>,
+                              'ram': <count across user>}}
+        """
+        return cls._get_counts_in_db(context, project_id, user_id=user_id)
+
 
 @db_api.pick_context_manager_writer
 def _migrate_instance_keypairs(ctxt, count):