Skip to content

Commit

Permalink
Override update_available_resources interval
Browse files Browse the repository at this point in the history
Add a config option 'update_resources_interval' to be able to set how
often the update_available_resources periodic task should run.

When a single nova-compute is managing a lot of nodes, we need the
ability to tune the frequency of this task.

The default value of 0 means to run on every periodic task interval
(same as current/old behavior). Setting it to a negative value will
prevent it from running at all. Setting it to a positive value will
determine the number of seconds between calls.

DocImpact
This adds a new CONF option 'update_resources_interval' that controls
how often the update_available_resource() method is called.

Co-Authored-By: Ed Leafe <ed@leafe.com>

Change-Id: I8986d8d97dbf348e219c5f1f0a62092115c096d5
  • Loading branch information
EdLeafe committed Mar 23, 2015
1 parent fd6071c commit 0279506
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 7 deletions.
42 changes: 35 additions & 7 deletions nova/compute/manager.py
Expand Up @@ -180,6 +180,14 @@
'that its view of instances is in sync with nova. If the '
'CONF option `scheduler_tracks_instance_changes` is '
'False, changing this option will have no effect.'),
cfg.IntOpt('update_resources_interval',
default=0,
help='Interval in seconds for updating compute resources. A '
'number less than 0 means to disable the task completely. '
'Leaving this at the default of 0 will cause this to run '
'at the default periodic interval. Setting it to any '
'positive value will cause it to run at approximately '
'that number of seconds.'),
]

timeout_opts = [
Expand Down Expand Up @@ -6171,7 +6179,7 @@ def _reclaim_queued_deletes(self, context):
"instance: %s"),
e, instance=instance)

@periodic_task.periodic_task
@periodic_task.periodic_task(spacing=CONF.update_resources_interval)
def update_available_resource(self, context):
"""See driver.get_available_resource()
Expand All @@ -6181,23 +6189,43 @@ def update_available_resource(self, context):
:param context: security context
"""
new_resource_tracker_dict = {}

# Delete orphan compute node not reported by driver but still in db
compute_nodes_in_db = self._get_compute_nodes_in_db(context,
use_slave=True)
nodenames = set(self.driver.get_available_nodes())
for nodename in nodenames:
rt = self._get_resource_tracker(nodename)
rt.update_available_resource(context)
try:
rt.update_available_resource(context)
except exception.ComputeHostNotFound:
# NOTE(comstud): We can get to this case if a node was
# marked 'deleted' in the DB and then re-added with a
# different auto-increment id. The cached resource
# tracker tried to update a deleted record and failed.
# Don't add this resource tracker to the new dict, so
# that this will resolve itself on the next run.
LOG.info(_LI("Compute node '%s' not found in "
"update_available_resource."), nodename)
continue
except Exception as e:
LOG.error(_LE("Error updating resources for node "
"%(node)%s: %(e)s"),
{'node': nodename, 'e': e})
new_resource_tracker_dict[nodename] = rt

# Delete orphan compute node not reported by driver but still in db
compute_nodes_in_db = self._get_compute_nodes_in_db(context,
use_slave=True)
# NOTE(comstud): Replace the RT cache before looping through
# compute nodes to delete below, as we can end up doing greenthread
# switches there. Best to have everyone using the newest cache
# ASAP.
self._resource_tracker_dict = new_resource_tracker_dict

# Delete orphan compute node not reported by driver but still in db
for cn in compute_nodes_in_db:
if cn.hypervisor_hostname not in nodenames:
LOG.info(_LI("Deleting orphan compute node %s") % cn.id)
cn.destroy()

self._resource_tracker_dict = new_resource_tracker_dict

def _get_compute_nodes_in_db(self, context, use_slave=False):
try:
return objects.ComputeNodeList.get_all_by_host(context, self.host,
Expand Down
61 changes: 61 additions & 0 deletions nova/tests/unit/compute/test_compute_mgr.py
Expand Up @@ -52,6 +52,7 @@
from nova import utils
from nova.virt import driver as virt_driver
from nova.virt import event as virtevent
from nova.virt import fake as fake_driver
from nova.virt import hardware


Expand Down Expand Up @@ -122,6 +123,66 @@ def _mark_shutdown(*args, **kwargs):
'_shutdown_instance', 'delete'],
methods_called)

@mock.patch.object(manager.ComputeManager, '_get_resource_tracker')
@mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
@mock.patch.object(manager.ComputeManager, '_get_compute_nodes_in_db')
def test_update_available_resource(self, get_db_nodes, get_avail_nodes,
get_rt):
info = {'cn_id': 1}

def _make_compute_node(hyp_hostname):
cn = mock.Mock(spec_set=['hypervisor_hostname', 'id',
'destroy'])
cn.id = info['cn_id']
info['cn_id'] += 1
cn.hypervisor_hostname = hyp_hostname
return cn

def _make_rt(node):
n = mock.Mock(spec_set=['update_available_resource',
'nodename'])
n.nodename = node
return n

ctxt = mock.Mock()
db_nodes = [_make_compute_node('node1'),
_make_compute_node('node2'),
_make_compute_node('node3'),
_make_compute_node('node4')]
avail_nodes = set(['node2', 'node3', 'node4', 'node5'])
avail_nodes_l = list(avail_nodes)
rts = [_make_rt(node) for node in avail_nodes_l]
# Make the 2nd and 3rd ones raise
exc = exception.ComputeHostNotFound(host='fake')
rts[1].update_available_resource.side_effect = exc
exc = test.TestingException()
rts[2].update_available_resource.side_effect = exc
rts_iter = iter(rts)

def _get_rt_side_effect(*args, **kwargs):
return rts_iter.next()

expected_rt_dict = {avail_nodes_l[0]: rts[0],
avail_nodes_l[2]: rts[2],
avail_nodes_l[3]: rts[3]}
get_db_nodes.return_value = db_nodes
get_avail_nodes.return_value = avail_nodes
get_rt.side_effect = _get_rt_side_effect
self.compute.update_available_resource(ctxt)
get_db_nodes.assert_called_once_with(ctxt, use_slave=True)
self.assertEqual([mock.call(node) for node in avail_nodes],
get_rt.call_args_list)
for rt in rts:
rt.update_available_resource.assert_called_once_with(ctxt)
self.assertEqual(expected_rt_dict,
self.compute._resource_tracker_dict)
# First node in set should have been removed from DB
for db_node in db_nodes:
if db_node.hypervisor_hostname == 'node1':
db_node.destroy.assert_called_once_with()
else:
self.assertFalse(db_node.destroy.called)

def test_allocate_network_succeeds_after_retries(self):
self.flags(network_allocate_retries=8)

Expand Down

0 comments on commit 0279506

Please sign in to comment.