-
Notifications
You must be signed in to change notification settings - Fork 2.5k
/
host_manager.py
504 lines (435 loc) · 20 KB
/
host_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
# Copyright (c) 2011 OpenStack Foundation
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Manage hosts in the current zone.
"""
import collections
import UserDict
import iso8601
from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import jsonutils
from oslo_utils import timeutils
from nova.compute import task_states
from nova.compute import vm_states
from nova import context as ctxt_mod
from nova import exception
from nova.i18n import _, _LI, _LW
from nova import objects
from nova.pci import stats as pci_stats
from nova.scheduler import filters
from nova.scheduler import weights
from nova.virt import hardware
host_manager_opts = [
cfg.MultiStrOpt('scheduler_available_filters',
default=['nova.scheduler.filters.all_filters'],
help='Filter classes available to the scheduler which may '
'be specified more than once. An entry of '
'"nova.scheduler.filters.all_filters" '
'maps to all filters included with nova.'),
cfg.ListOpt('scheduler_default_filters',
default=[
'RetryFilter',
'AvailabilityZoneFilter',
'RamFilter',
'ComputeFilter',
'ComputeCapabilitiesFilter',
'ImagePropertiesFilter',
'ServerGroupAntiAffinityFilter',
'ServerGroupAffinityFilter',
],
help='Which filter class names to use for filtering hosts '
'when not specified in the request.'),
cfg.ListOpt('scheduler_weight_classes',
default=['nova.scheduler.weights.all_weighers'],
help='Which weight class names to use for weighing hosts'),
]
CONF = cfg.CONF
CONF.register_opts(host_manager_opts)
LOG = logging.getLogger(__name__)
class ReadOnlyDict(UserDict.IterableUserDict):
"""A read-only dict."""
def __init__(self, source=None):
self.data = {}
if source:
self.data.update(source)
def __setitem__(self, key, item):
raise TypeError()
def __delitem__(self, key):
raise TypeError()
def clear(self):
raise TypeError()
def pop(self, key, *args):
raise TypeError()
def popitem(self):
raise TypeError()
def update(self):
raise TypeError()
# Representation of a single metric value from a compute node.
MetricItem = collections.namedtuple(
'MetricItem', ['value', 'timestamp', 'source'])
class HostState(object):
"""Mutable and immutable information tracked for a host.
This is an attempt to remove the ad-hoc data structures
previously used and lock down access.
"""
def __init__(self, host, node, compute=None):
self.host = host
self.nodename = node
# Mutable available resources.
# These will change as resources are virtually "consumed".
self.total_usable_ram_mb = 0
self.total_usable_disk_gb = 0
self.disk_mb_used = 0
self.free_ram_mb = 0
self.free_disk_mb = 0
self.vcpus_total = 0
self.vcpus_used = 0
self.numa_topology = None
self.instance_numa_topology = None
# Additional host information from the compute node stats:
self.num_instances = 0
self.num_io_ops = 0
# Other information
self.host_ip = None
self.hypervisor_type = None
self.hypervisor_version = None
self.hypervisor_hostname = None
self.cpu_info = None
self.supported_instances = None
# Resource oversubscription values for the compute host:
self.limits = {}
# Generic metrics from compute nodes
self.metrics = {}
# List of aggregates the host belongs to
self.aggregates = []
self.updated = None
if compute:
self.update_from_compute_node(compute)
def update_service(self, service):
self.service = ReadOnlyDict(service)
def _update_metrics_from_compute_node(self, compute):
"""Update metrics from a ComputeNode object."""
# NOTE(llu): The 'or []' is to avoid json decode failure of None
# returned from compute.get, because DB schema allows
# NULL in the metrics column
metrics = compute.metrics or []
if metrics:
metrics = jsonutils.loads(metrics)
for metric in metrics:
# 'name', 'value', 'timestamp' and 'source' are all required
# to be valid keys, just let KeyError happen if any one of
# them is missing. But we also require 'name' to be True.
name = metric['name']
item = MetricItem(value=metric['value'],
timestamp=metric['timestamp'],
source=metric['source'])
if name:
self.metrics[name] = item
else:
LOG.warning(_LW("Metric name unknown of %r"), item)
def update_from_compute_node(self, compute):
"""Update information about a host from a ComputeNode object."""
if (self.updated and compute.updated_at
and self.updated > compute.updated_at):
return
all_ram_mb = compute.memory_mb
# Assume virtual size is all consumed by instances if use qcow2 disk.
free_gb = compute.free_disk_gb
least_gb = compute.disk_available_least
if least_gb is not None:
if least_gb > free_gb:
# can occur when an instance in database is not on host
LOG.warning(_LW("Host %(hostname)s has more disk space than "
"database expected "
"(%(physical)sgb > %(database)sgb)"),
{'physical': least_gb, 'database': free_gb,
'hostname': compute.hypervisor_hostname})
free_gb = min(least_gb, free_gb)
free_disk_mb = free_gb * 1024
self.disk_mb_used = compute.local_gb_used * 1024
# NOTE(jogo) free_ram_mb can be negative
self.free_ram_mb = compute.free_ram_mb
self.total_usable_ram_mb = all_ram_mb
self.total_usable_disk_gb = compute.local_gb
self.free_disk_mb = free_disk_mb
self.vcpus_total = compute.vcpus
self.vcpus_used = compute.vcpus_used
self.updated = compute.updated_at
self.numa_topology = compute.numa_topology
self.instance_numa_topology = None
if compute.pci_device_pools is not None:
self.pci_stats = pci_stats.PciDeviceStats(
compute.pci_device_pools)
else:
self.pci_stats = None
# All virt drivers report host_ip
self.host_ip = compute.host_ip
self.hypervisor_type = compute.hypervisor_type
self.hypervisor_version = compute.hypervisor_version
self.hypervisor_hostname = compute.hypervisor_hostname
self.cpu_info = compute.cpu_info
if compute.supported_hv_specs:
self.supported_instances = [spec.to_list() for spec
in compute.supported_hv_specs]
else:
self.supported_instances = []
# Don't store stats directly in host_state to make sure these don't
# overwrite any values, or get overwritten themselves. Store in self so
# filters can schedule with them.
self.stats = compute.stats or {}
# Track number of instances on host
self.num_instances = int(self.stats.get('num_instances', 0))
self.num_io_ops = int(self.stats.get('io_workload', 0))
# update metrics
self._update_metrics_from_compute_node(compute)
def consume_from_instance(self, instance):
"""Incrementally update host state from an instance."""
disk_mb = (instance['root_gb'] + instance['ephemeral_gb']) * 1024
ram_mb = instance['memory_mb']
vcpus = instance['vcpus']
self.free_ram_mb -= ram_mb
self.free_disk_mb -= disk_mb
self.vcpus_used += vcpus
now = timeutils.utcnow()
# NOTE(sbauza): Objects are UTC tz-aware by default
self.updated = now.replace(tzinfo=iso8601.iso8601.Utc())
# Track number of instances on host
self.num_instances += 1
instance_numa_topology = hardware.instance_topology_from_instance(
instance)
instance_cells = None
if instance_numa_topology:
instance_cells = instance_numa_topology.cells
pci_requests = instance.get('pci_requests')
# NOTE(danms): Instance here is still a dict, which is converted from
# an object. Thus, it has a .pci_requests field, which gets converted
# to a primitive early on, and is thus a dict here. Convert this when
# we get an object all the way to this path.
if pci_requests and pci_requests['requests'] and self.pci_stats:
self.pci_stats.apply_requests(pci_requests.requests,
instance_cells)
# Calculate the numa usage
instance['numa_topology'] = self.instance_numa_topology
updated_numa_topology = hardware.get_host_numa_usage_from_instance(
self, instance)
self.numa_topology = updated_numa_topology
vm_state = instance.get('vm_state', vm_states.BUILDING)
task_state = instance.get('task_state')
if vm_state == vm_states.BUILDING or task_state in [
task_states.RESIZE_MIGRATING, task_states.REBUILDING,
task_states.RESIZE_PREP, task_states.IMAGE_SNAPSHOT,
task_states.IMAGE_BACKUP, task_states.UNSHELVING,
task_states.RESCUING]:
self.num_io_ops += 1
def __repr__(self):
return ("(%s, %s) ram:%s disk:%s io_ops:%s instances:%s" %
(self.host, self.nodename, self.free_ram_mb, self.free_disk_mb,
self.num_io_ops, self.num_instances))
class HostManager(object):
"""Base HostManager class."""
# Can be overridden in a subclass
def host_state_cls(self, host, node, **kwargs):
return HostState(host, node, **kwargs)
def __init__(self):
self.host_state_map = {}
self.filter_handler = filters.HostFilterHandler()
filter_classes = self.filter_handler.get_matching_classes(
CONF.scheduler_available_filters)
self.filter_cls_map = {cls.__name__: cls for cls in filter_classes}
self.filter_obj_map = {}
self.default_filters = self._choose_host_filters(
CONF.scheduler_default_filters)
self.weight_handler = weights.HostWeightHandler()
weigher_classes = self.weight_handler.get_matching_classes(
CONF.scheduler_weight_classes)
self.weighers = [cls() for cls in weigher_classes]
# Dict of aggregates keyed by their ID
self.aggs_by_id = {}
# Dict of set of aggregate IDs keyed by the name of the host belonging
# to those aggregates
self.host_aggregates_map = collections.defaultdict(set)
self._init_aggregates()
def _init_aggregates(self):
elevated = ctxt_mod.get_admin_context()
aggs = objects.AggregateList.get_all(elevated)
for agg in aggs:
self.aggs_by_id[agg.id] = agg
for host in agg.hosts:
self.host_aggregates_map[host].add(agg.id)
def update_aggregates(self, aggregates):
"""Updates internal HostManager information about aggregates."""
if isinstance(aggregates, (list, objects.AggregateList)):
for agg in aggregates:
self._update_aggregate(agg)
else:
self._update_aggregate(aggregates)
def _update_aggregate(self, aggregate):
self.aggs_by_id[aggregate.id] = aggregate
for host in aggregate.hosts:
self.host_aggregates_map[host].add(aggregate.id)
# Refreshing the mapping dict to remove all hosts that are no longer
# part of the aggregate
for host in self.host_aggregates_map:
if (aggregate.id in self.host_aggregates_map[host]
and host not in aggregate.hosts):
self.host_aggregates_map[host].remove(aggregate.id)
def delete_aggregate(self, aggregate):
"""Deletes internal HostManager information about a specific aggregate.
"""
if aggregate.id in self.aggs_by_id:
del self.aggs_by_id[aggregate.id]
for host in aggregate.hosts:
if aggregate.id in self.host_aggregates_map[host]:
self.host_aggregates_map[host].remove(aggregate.id)
def _choose_host_filters(self, filter_cls_names):
"""Since the caller may specify which filters to use we need
to have an authoritative list of what is permissible. This
function checks the filter names against a predefined set
of acceptable filters.
"""
if not isinstance(filter_cls_names, (list, tuple)):
filter_cls_names = [filter_cls_names]
good_filters = []
bad_filters = []
for filter_name in filter_cls_names:
if filter_name not in self.filter_obj_map:
if filter_name not in self.filter_cls_map:
bad_filters.append(filter_name)
continue
filter_cls = self.filter_cls_map[filter_name]
self.filter_obj_map[filter_name] = filter_cls()
good_filters.append(self.filter_obj_map[filter_name])
if bad_filters:
msg = ", ".join(bad_filters)
raise exception.SchedulerHostFilterNotFound(filter_name=msg)
return good_filters
def get_filtered_hosts(self, hosts, filter_properties,
filter_class_names=None, index=0):
"""Filter hosts and return only ones passing all filters."""
def _strip_ignore_hosts(host_map, hosts_to_ignore):
ignored_hosts = []
for host in hosts_to_ignore:
for (hostname, nodename) in host_map.keys():
if host == hostname:
del host_map[(hostname, nodename)]
ignored_hosts.append(host)
ignored_hosts_str = ', '.join(ignored_hosts)
msg = _('Host filter ignoring hosts: %s')
LOG.info(msg % ignored_hosts_str)
def _match_forced_hosts(host_map, hosts_to_force):
forced_hosts = []
for (hostname, nodename) in host_map.keys():
if hostname not in hosts_to_force:
del host_map[(hostname, nodename)]
else:
forced_hosts.append(hostname)
if host_map:
forced_hosts_str = ', '.join(forced_hosts)
msg = _('Host filter forcing available hosts to %s')
else:
forced_hosts_str = ', '.join(hosts_to_force)
msg = _("No hosts matched due to not matching "
"'force_hosts' value of '%s'")
LOG.info(msg % forced_hosts_str)
def _match_forced_nodes(host_map, nodes_to_force):
forced_nodes = []
for (hostname, nodename) in host_map.keys():
if nodename not in nodes_to_force:
del host_map[(hostname, nodename)]
else:
forced_nodes.append(nodename)
if host_map:
forced_nodes_str = ', '.join(forced_nodes)
msg = _('Host filter forcing available nodes to %s')
else:
forced_nodes_str = ', '.join(nodes_to_force)
msg = _("No nodes matched due to not matching "
"'force_nodes' value of '%s'")
LOG.info(msg % forced_nodes_str)
if filter_class_names is None:
filters = self.default_filters
else:
filters = self._choose_host_filters(filter_class_names)
ignore_hosts = filter_properties.get('ignore_hosts', [])
force_hosts = filter_properties.get('force_hosts', [])
force_nodes = filter_properties.get('force_nodes', [])
if ignore_hosts or force_hosts or force_nodes:
# NOTE(deva): we can't assume "host" is unique because
# one host may have many nodes.
name_to_cls_map = {(x.host, x.nodename): x for x in hosts}
if ignore_hosts:
_strip_ignore_hosts(name_to_cls_map, ignore_hosts)
if not name_to_cls_map:
return []
# NOTE(deva): allow force_hosts and force_nodes independently
if force_hosts:
_match_forced_hosts(name_to_cls_map, force_hosts)
if force_nodes:
_match_forced_nodes(name_to_cls_map, force_nodes)
if force_hosts or force_nodes:
# NOTE(deva): Skip filters when forcing host or node
if name_to_cls_map:
return name_to_cls_map.values()
hosts = name_to_cls_map.itervalues()
return self.filter_handler.get_filtered_objects(filters,
hosts, filter_properties, index)
def get_weighed_hosts(self, hosts, weight_properties):
"""Weigh the hosts."""
return self.weight_handler.get_weighed_objects(self.weighers,
hosts, weight_properties)
def get_all_host_states(self, context):
"""Returns a list of HostStates that represents all the hosts
the HostManager knows about. Also, each of the consumable resources
in HostState are pre-populated and adjusted based on data in the db.
"""
service_refs = {service.host: service
for service in objects.ServiceList.get_by_binary(
context, 'nova-compute')}
# Get resource usage across the available compute nodes:
compute_nodes = objects.ComputeNodeList.get_all(context)
seen_nodes = set()
for compute in compute_nodes:
service = service_refs.get(compute.host)
if not service:
LOG.warning(_LW(
"No compute service record found for host %(host)s"),
{'host': compute.host})
continue
host = compute.host
node = compute.hypervisor_hostname
state_key = (host, node)
host_state = self.host_state_map.get(state_key)
if host_state:
host_state.update_from_compute_node(compute)
else:
host_state = self.host_state_cls(host, node, compute=compute)
self.host_state_map[state_key] = host_state
# We force to update the aggregates info each time a new request
# comes in, because some changes on the aggregates could have been
# happening after setting this field for the first time
host_state.aggregates = [self.aggs_by_id[agg_id] for agg_id in
self.host_aggregates_map[
host_state.host]]
host_state.update_service(dict(service.iteritems()))
seen_nodes.add(state_key)
# remove compute nodes from host_state_map if they are not active
dead_nodes = set(self.host_state_map.keys()) - seen_nodes
for state_key in dead_nodes:
host, node = state_key
LOG.info(_LI("Removing dead compute node %(host)s:%(node)s "
"from scheduler"), {'host': host, 'node': node})
del self.host_state_map[state_key]
return self.host_state_map.itervalues()