Skip to content

Commit

Permalink
Make get_allocations_for_resource_provider raise
Browse files Browse the repository at this point in the history
In preparation for reshaper work, and because it's The Right Thing To
Do, this patch makes the report client's
get_allocations_for_resource_provider method stop acting like something
didn't go wrong when its placement API request fails. We remove the
@safe_connect decorator so it raises (a subclass of) ksa's
ClientException when something goes wrong with the API communication.
And the method now raises a new
ResourceProviderAllocationRetrievalFailed exception on non-200.

In the spirit of the aggregate and trait retrieval methods, it now
returns a namedtuple containing the allocation information.

Unit tests, which were entirely absent, are added for the method.

The resource tracker's _remove_deleted_instances_allocations, which is
get_allocations_for_resource_provider's only consumer (for now, until
reshaper work starts using it) is refactored to behave the same way it
used to, which is to no-op if the placement API request fails. However,
a) we take an earlier short-circuit out of the method, which saves a
little work copying context stuff; and b) we now emit a warning log
message if the no-op is due to the newly-raised exceptions.

Change-Id: I020e7dc47efc79f8907b7bfb753ec779a8da69a1
  • Loading branch information
Eric Fried committed Aug 23, 2018
1 parent d13e5a5 commit f534495
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 25 deletions.
17 changes: 15 additions & 2 deletions nova/compute/resource_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import copy
import retrying

from keystoneauth1 import exceptions as ks_exc
from oslo_log import log as logging
from oslo_serialization import jsonutils

Expand Down Expand Up @@ -1271,8 +1272,20 @@ def _remove_deleted_instances_allocations(self, context, cn,
# happen according to the normal flow of events where the scheduler
# always creates allocations for an instance
known_instances = set(self.tracked_instances.keys())
allocations = self.reportclient.get_allocations_for_resource_provider(
context, cn.uuid) or {}
try:
# pai: report.ProviderAllocInfo namedtuple
pai = self.reportclient.get_allocations_for_resource_provider(
context, cn.uuid)
except (exception.ResourceProviderAllocationRetrievalFailed,
ks_exc.ClientException) as e:
LOG.error("Skipping removal of allocations for deleted instances: "
"%s", e)
return
allocations = pai.allocations
if not allocations:
# The main loop below would short-circuit anyway, but this saves us
# the (potentially expensive) context.elevated construction below.
return
read_deleted_context = context.elevated(read_deleted='yes')
for consumer_uuid, alloc in allocations.items():
if consumer_uuid in known_instances:
Expand Down
5 changes: 5 additions & 0 deletions nova/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -2368,3 +2368,8 @@ def __init__(self, message=None, **kwargs):

class NoResourceClass(NovaException):
msg_fmt = _("Resource class not found for Ironic node %(node)s.")


class ResourceProviderAllocationRetrievalFailed(NovaException):
msg_fmt = _("Failed to retrieve allocations for resource provider "
"%(rp_uuid)s: %(error)s")
21 changes: 17 additions & 4 deletions nova/scheduler/client/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@

AggInfo = collections.namedtuple('AggInfo', ['aggregates', 'generation'])
TraitInfo = collections.namedtuple('TraitInfo', ['traits', 'generation'])
ProviderAllocInfo = collections.namedtuple(
'ProviderAllocInfo', ['allocations'])


def warn_limit(self, msg):
Expand Down Expand Up @@ -1933,14 +1935,25 @@ def update_instance_allocation(self, context, compute_node, instance,
else:
self.delete_allocation_for_instance(context, instance.uuid)

@safe_connect
def get_allocations_for_resource_provider(self, context, rp_uuid):
"""Retrieves the allocations for a specific provider.
:param context: The nova.context.RequestContext auth context
:param rp_uuid: The UUID of the provider.
:return: ProviderAllocInfo namedtuple.
:raises: keystoneauth1.exceptions.base.ClientException on failure to
communicate with the placement API
:raises: ResourceProviderAllocationRetrievalFailed if the placement API
call fails.
"""
url = '/resource_providers/%s/allocations' % rp_uuid
resp = self.get(url, global_request_id=context.global_id)
if not resp:
return {}
else:
return resp.json()['allocations']
raise exception.ResourceProviderAllocationRetrievalFailed(
rp_uuid=rp_uuid, error=resp.text)

data = resp.json()
return ProviderAllocInfo(allocations=data['allocations'])

def delete_resource_provider(self, context, compute_node, cascade=False):
"""Deletes the ResourceProvider record for the compute_node.
Expand Down
75 changes: 56 additions & 19 deletions nova/tests/unit/compute/test_resource_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import copy
import datetime

from keystoneauth1 import exceptions as ks_exc
import mock
from oslo_config import cfg
from oslo_utils import timeutils
Expand All @@ -33,6 +34,7 @@
from nova.objects import pci_device
from nova.pci import manager as pci_manager
from nova import rc_fields
from nova.scheduler.client import report
from nova import test
from nova.tests.unit import fake_notifier
from nova.tests.unit.objects import test_pci_device as fake_pci_device
Expand Down Expand Up @@ -2799,7 +2801,8 @@ def test_remove_deleted_instances_allocations_deleted_instance(self,
mock_inst_get):
rc = self.rt.reportclient
self.rt.tracked_instances = {}
allocs = {uuids.deleted: "fake_deleted_instance"}
allocs = report.ProviderAllocInfo(
allocations={uuids.deleted: "fake_deleted_instance"})
rc.get_allocations_for_resource_provider = mock.MagicMock(
return_value=allocs)
rc.delete_allocation_for_instance = mock.MagicMock()
Expand All @@ -2824,7 +2827,8 @@ def test_remove_deleted_instances_allocations_building_instance(self,
mock_inst_get):
rc = self.rt.reportclient
self.rt.tracked_instances = {}
allocs = {uuids.deleted: "fake_deleted_instance"}
allocs = report.ProviderAllocInfo(
allocations={uuids.deleted: "fake_deleted_instance"})
rc.get_allocations_for_resource_provider = mock.MagicMock(
return_value=allocs)
rc.delete_allocation_for_instance = mock.MagicMock()
Expand All @@ -2843,8 +2847,9 @@ def test_remove_deleted_instances_allocations_ignores_migrations(self,
mock_inst_get):
rc = self.rt.reportclient
self.rt.tracked_instances = {}
allocs = {uuids.deleted: "fake_deleted_instance",
uuids.migration: "fake_migration"}
allocs = report.ProviderAllocInfo(
allocations={uuids.deleted: "fake_deleted_instance",
uuids.migration: "fake_migration"})
mig = objects.Migration(uuid=uuids.migration)
rc.get_allocations_for_resource_provider = mock.MagicMock(
return_value=allocs)
Expand All @@ -2867,7 +2872,8 @@ def test_remove_deleted_instances_allocations_scheduled_instance(self,
mock_inst_get):
rc = self.rt.reportclient
self.rt.tracked_instances = {}
allocs = {uuids.scheduled: "fake_scheduled_instance"}
allocs = report.ProviderAllocInfo(
allocations={uuids.scheduled: "fake_scheduled_instance"})
rc.get_allocations_for_resource_provider = mock.MagicMock(
return_value=allocs)
rc.delete_allocation_for_instance = mock.MagicMock()
Expand Down Expand Up @@ -2916,15 +2922,17 @@ def test_remove_deleted_instances_allocations_known_instance(self):
rc = self.rt.reportclient
self.rt.tracked_instances = {
uuids.known: objects.Instance(uuid=uuids.known)}
allocs = {
uuids.known: {
'resources': {
'VCPU': 1,
'MEMORY_MB': 2048,
'DISK_GB': 20
allocs = report.ProviderAllocInfo(
allocations={
uuids.known: {
'resources': {
'VCPU': 1,
'MEMORY_MB': 2048,
'DISK_GB': 20
}
}
}
}
)
rc.get_allocations_for_resource_provider = mock.MagicMock(
return_value=allocs)
rc.delete_allocation_for_instance = mock.MagicMock()
Expand All @@ -2951,15 +2959,17 @@ def test_remove_deleted_instances_allocations_unknown_instance(
# No tracked instances on this node.
self.rt.tracked_instances = {}
# But there is an allocation for an instance on this node.
allocs = {
instance.uuid: {
'resources': {
'VCPU': 1,
'MEMORY_MB': 2048,
'DISK_GB': 20
allocs = report.ProviderAllocInfo(
allocations={
instance.uuid: {
'resources': {
'VCPU': 1,
'MEMORY_MB': 2048,
'DISK_GB': 20
}
}
}
}
)
rc.get_allocations_for_resource_provider = mock.MagicMock(
return_value=allocs)
rc.delete_allocation_for_instance = mock.MagicMock()
Expand All @@ -2974,6 +2984,33 @@ def test_remove_deleted_instances_allocations_unknown_instance(
# and figure things out, this should actually be an error.
rc.delete_allocation_for_instance.assert_not_called()

def test_remove_deleted_instances_allocations_retrieval_fail(self):
"""When the report client errs or otherwise retrieves no allocations,
_remove_deleted_instances_allocations gracefully no-ops.
"""
cn = self.rt.compute_nodes[_NODENAME]
rc = self.rt.reportclient
# We'll test three different ways get_allocations_for_resource_provider
# can cause us to no-op.
side_effects = (
# Actual placement error
exc.ResourceProviderAllocationRetrievalFailed(
rp_uuid='rp_uuid', error='error'),
# API communication failure
ks_exc.ClientException,
# Legitimately no allocations
report.ProviderAllocInfo(allocations={}),
)
rc.get_allocations_for_resource_provider = mock.Mock(
side_effect=side_effects)
for _ in side_effects:
# If we didn't no op, this would blow up at 'ctx'.elevated()
self.rt._remove_deleted_instances_allocations(
'ctx', cn, [], self.rt.tracked_instances)
rc.get_allocations_for_resource_provider.assert_called_once_with(
'ctx', cn.uuid)
rc.get_allocations_for_resource_provider.reset_mock()

def test_delete_allocation_for_shelve_offloaded_instance(self):
instance = _INSTANCE_FIXTURES[0].obj_clone()
instance.uuid = uuids.inst0
Expand Down
22 changes: 22 additions & 0 deletions nova/tests/unit/scheduler/client/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3406,6 +3406,28 @@ def test_delete_resource_provider_log_calls(self, mock_log, mock_delete):
self.assertEqual(0, mock_log.info.call_count)
self.assertEqual(1, mock_log.error.call_count)

@mock.patch("nova.scheduler.client.report.SchedulerReportClient.get")
def test_get_allocations_for_resource_provider(self, mock_get):
mock_get.return_value = fake_requests.FakeResponse(
200, content=jsonutils.dumps(
{'allocations': 'fake', 'resource_provider_generation': 42}))
ret = self.client.get_allocations_for_resource_provider(
self.context, 'rpuuid')
self.assertEqual('fake', ret.allocations)
mock_get.assert_called_once_with(
'/resource_providers/rpuuid/allocations',
global_request_id=self.context.global_id)

@mock.patch("nova.scheduler.client.report.SchedulerReportClient.get")
def test_get_allocations_for_resource_provider_fail(self, mock_get):
mock_get.return_value = fake_requests.FakeResponse(400, content="ouch")
self.assertRaises(exception.ResourceProviderAllocationRetrievalFailed,
self.client.get_allocations_for_resource_provider,
self.context, 'rpuuid')
mock_get.assert_called_once_with(
'/resource_providers/rpuuid/allocations',
global_request_id=self.context.global_id)


class TestResourceClass(SchedulerReportClientTestCase):
def setUp(self):
Expand Down

0 comments on commit f534495

Please sign in to comment.