nova/virt/xenapi/host.py

# Copyright (c) 2012 Citrix Systems, Inc.
# Copyright 2010 OpenStack Foundation
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

"""
Management class for host-related functions (start, reboot, etc).
"""

import re

from os_xenapi.client import host_management
from os_xenapi.client import XenAPI
from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import jsonutils


from nova.compute import task_states
from nova.compute import vm_states
from nova import context
from nova import exception
from nova.i18n import _
from nova import objects
from nova.objects import fields as obj_fields
from nova.virt.xenapi import pool_states
from nova.virt.xenapi import vm_utils

CONF = cfg.CONF
LOG = logging.getLogger(__name__)


class Host(object):
    """Implements host related operations."""
    def __init__(self, session, virtapi):
        self._session = session
        self._virtapi = virtapi

    def host_power_action(self, action):
        """Reboots or shuts down the host."""
        args = {"action": jsonutils.dumps(action)}
        methods = {"reboot": "host_reboot", "shutdown": "host_shutdown"}
        response = call_xenhost(self._session, methods[action], args)
        return response.get("power_action", response)

    def host_maintenance_mode(self, host, mode):
        """Start/Stop host maintenance window. On start, it triggers
        guest VMs evacuation.
        """
        if not mode:
            return 'off_maintenance'
        host_list = [host_ref for host_ref in
                     self._session.host.get_all()
                     if host_ref != self._session.host_ref]
        migrations_counter = vm_counter = 0
        ctxt = context.get_admin_context()
        for vm_ref, vm_rec in vm_utils.list_vms(self._session):
            for host_ref in host_list:
                try:
                    # Ensure only guest instances are migrated
                    uuid = vm_rec['other_config'].get('nova_uuid')
                    if not uuid:
                        name = vm_rec['name_label']
                        uuid = _uuid_find(ctxt, host, name)
                        if not uuid:
                            LOG.info('Instance %(name)s running on '
                                     '%(host)s could not be found in '
                                     'the database: assuming it is a '
                                     'worker VM and skip ping migration '
                                     'to a new host',
                                     {'name': name, 'host': host})
                            continue
                    instance = objects.Instance.get_by_uuid(ctxt, uuid)
                    vm_counter = vm_counter + 1

                    aggregate = objects.AggregateList.get_by_host(
                        ctxt, host, key=pool_states.POOL_FLAG)
                    if not aggregate:
                        msg = _('Aggregate for host %(host)s count not be'
                                ' found.') % dict(host=host)
                        raise exception.NotFound(msg)

                    dest = _host_find(ctxt, self._session, aggregate[0],
                                      host_ref)
                    instance.host = dest
                    instance.task_state = task_states.MIGRATING
                    instance.save()

                    self._session.VM.pool_migrate(vm_ref, host_ref,
                                                  {"live": "true"})
                    migrations_counter = migrations_counter + 1

                    instance.vm_state = vm_states.ACTIVE
                    instance.save()

                    break
                except XenAPI.Failure:
                    LOG.exception(_('Unable to migrate VM %(vm_ref)s '
                                    'from %(host)s'),
                                  {'vm_ref': vm_ref, 'host': host})
                    instance.host = host
                    instance.vm_state = vm_states.ACTIVE
                    instance.save()

        if vm_counter == migrations_counter:
            return 'on_maintenance'
        else:
            raise exception.NoValidHost(reason=_('Unable to find suitable '
                                                 'host for VMs evacuation'))

    def set_host_enabled(self, enabled):
        """Sets the compute host's ability to accept new instances."""
        # Since capabilities are gone, use service table to disable a node
        # in scheduler
        cntxt = context.get_admin_context()
        service = objects.Service.get_by_args(cntxt, CONF.host,
                                              'nova-compute')
        service.disabled = not enabled
        service.disabled_reason = 'set by xenapi host_state'
        service.save()

        response = _call_host_management(self._session,
                                         host_management.set_host_enabled,
                                         jsonutils.dumps(enabled))
        return response.get("status", response)

    def get_host_uptime(self):
        """Returns the result of calling "uptime" on the target host."""
        response = _call_host_management(self._session,
                                         host_management.get_host_uptime)
        return response.get("uptime", response)


class HostState(object):
    """Manages information about the XenServer host this compute
    node is running on.
    """
    def __init__(self, session):
        super(HostState, self).__init__()
        self._session = session
        self._stats = {}
        self.update_status()

    def _get_passthrough_devices(self):
        """Get a list pci devices that are available for pci passthtough.

        We use a plugin to get the output of the lspci command runs on dom0.
        From this list we will extract pci devices that are using the pciback
        kernel driver.

        :returns: a list of pci devices on the node
        """
        def _compile_hex(pattern):
            """Return a compiled regular expression pattern into which we have
            replaced occurrences of hex by [\da-fA-F].
            """
            return re.compile(pattern.replace("hex", r"[\da-fA-F]"))

        def _parse_pci_device_string(dev_string):
            """Exctract information from the device string about the slot, the
            vendor and the product ID. The string is as follow:
                "Slot:\tBDF\nClass:\txxxx\nVendor:\txxxx\nDevice:\txxxx\n..."
            Return a dictionary with information about the device.
            """
            slot_regex = _compile_hex(r"Slot:\t"
                                      r"((?:hex{4}:)?"  # Domain: (optional)
                                      r"hex{2}:"        # Bus:
                                      r"hex{2}\."       # Device.
                                      r"hex{1})")       # Function
            vendor_regex = _compile_hex(r"\nVendor:\t(hex+)")
            product_regex = _compile_hex(r"\nDevice:\t(hex+)")

            slot_id = slot_regex.findall(dev_string)
            vendor_id = vendor_regex.findall(dev_string)
            product_id = product_regex.findall(dev_string)

            if not slot_id or not vendor_id or not product_id:
                raise exception.NovaException(
                    _("Failed to parse information about"
                      " a pci device for passthrough"))

            type_pci = host_management.get_pci_type(self._session, slot_id[0])

            return {'label': '_'.join(['label',
                                       vendor_id[0],
                                       product_id[0]]),
                    'vendor_id': vendor_id[0],
                    'product_id': product_id[0],
                    'address': slot_id[0],
                    'dev_id': '_'.join(['pci', slot_id[0]]),
                    'dev_type': type_pci,
                    'status': 'available'}

        # Devices are separated by a blank line. That is why we
        # use "\n\n" as separator.
        lspci_out = host_management.get_pci_device_details(self._session)

        pci_list = lspci_out.split("\n\n")

        # For each device of the list, check if it uses the pciback
        # kernel driver and if it does, get information and add it
        # to the list of passthrough_devices. Ignore it if the driver
        # is not pciback.
        passthrough_devices = []

        for dev_string_info in pci_list:
            if "Driver:\tpciback" in dev_string_info:
                new_dev = _parse_pci_device_string(dev_string_info)
                passthrough_devices.append(new_dev)

        return passthrough_devices

    def _get_vgpu_stats(self):
        """Invoke XenAPI to get the stats for VGPUs.

        The return value is a dict which has GPU groups' uuid as
        the keys:
            dict(grp_uuid_1=dict_vgpu_stats_in_grp_1,
                 grp_uuid_2=dict_vgpu_stats_in_grp_2,
                 ...,
                 grp_uuid_n=dict_vgpu_stats_in_grp_n)
        The `dict_vgpu_stats_in_grp_x` is a dict represents the
        vGPU stats in GPU group x. For details, please refer to
        the return value of the function of _get_vgpu_stats_in_group().
        """
        if not CONF.devices.enabled_vgpu_types:
            return {}

        vgpu_stats = {}

        # NOTE(jianghuaw): If there are multiple vGPU types enabled in
        # the configure option, we only choose the first one so that
        # we support only one vGPU type per compute node at the moment.
        # Once we switch to use the nested resource providers, we will
        # remove these lines to allow multiple vGPU types within multiple
        # GPU groups (each group has a different vGPU type enabled).
        if len(CONF.devices.enabled_vgpu_types) > 1:
            LOG.warning('XenAPI only supports one GPU type per compute node,'
                        ' only first type will be used.')
        cfg_enabled_types = CONF.devices.enabled_vgpu_types[:1]

        vgpu_grp_refs = self._session.call_xenapi('GPU_group.get_all')
        for ref in vgpu_grp_refs:
            grp_uuid = self._session.call_xenapi('GPU_group.get_uuid', ref)
            stat = self._get_vgpu_stats_in_group(ref, cfg_enabled_types)
            if stat:
                vgpu_stats[grp_uuid] = stat

        LOG.debug("Returning vGPU stats: %s", vgpu_stats)

        return vgpu_stats

    def _get_vgpu_stats_in_group(self, grp_ref, vgpu_types):
        """Get stats for the specified vGPU types in a GPU group.

        NOTE(Jianghuaw): In XenAPI, a GPU group is the minimal unit
        from where to create a vGPU for an instance. So here, we
        report vGPU resources for a particular GPU group. When we use
        nested resource providers to represent the vGPU resources,
        each GPU group will be a child resource provider under the
        compute node.

        The return value is a dict. For example:
        {'uuid': '6444c6ee-3a49-42f5-bebb-606b52175e67',
         'type_name': 'Intel GVT-g',
         'max_heads': 1,
         'total': 7,
         'remaining': 7,
         }
        """
        type_refs_in_grp = self._session.call_xenapi(
            'GPU_group.get_enabled_VGPU_types', grp_ref)

        type_names_in_grp = {self._session.call_xenapi(
                                 'VGPU_type.get_model_name',
                                 type_ref): type_ref
                             for type_ref in type_refs_in_grp}
        # Get the vGPU types enabled both in this GPU group and in the
        # nova conf.
        enabled_types = set(vgpu_types) & set(type_names_in_grp)
        if not enabled_types:
            return

        stat = {}
        # Get the sorted enabled types, so that we can always choose the same
        # type when there are multiple enabled vGPU types.
        sorted_types = sorted(enabled_types)
        chosen_type = sorted_types[0]
        if len(sorted_types) > 1:
            LOG.warning('XenAPI only supports one vGPU type per GPU group,'
                        ' but enabled multiple vGPU types: %(available)s.'
                        ' Choosing the first one: %(chosen)s.',
                       dict(available=sorted_types,
                            chosen=chosen_type))
        type_ref = type_names_in_grp[chosen_type]
        type_uuid = self._session.call_xenapi('VGPU_type.get_uuid', type_ref)
        stat['uuid'] = type_uuid
        stat['type_name'] = chosen_type
        stat['max_heads'] = int(self._session.call_xenapi(
            'VGPU_type.get_max_heads', type_ref))

        stat['total'] = self._get_total_vgpu_in_grp(grp_ref, type_ref)
        stat['remaining'] = int(self._session.call_xenapi(
                                    'GPU_group.get_remaining_capacity',
                                    grp_ref,
                                    type_ref))
        return stat

    def _get_total_vgpu_in_grp(self, grp_ref, type_ref):
        """Get the total capacity of vGPUs in the group."""
        pgpu_recs = self._session.call_xenapi(
            'PGPU.get_all_records_where', 'field "GPU_group" = "%s"' % grp_ref)

        total = 0
        for pgpu_ref in pgpu_recs:
            pgpu_rec = pgpu_recs[pgpu_ref]
            if type_ref in pgpu_rec['enabled_VGPU_types']:
                cap = pgpu_rec['supported_VGPU_max_capacities'][type_ref]
                total += int(cap)
        return total

    def get_host_stats(self, refresh=False):
        """Return the current state of the host. If 'refresh' is
        True, run the update first.
        """
        if refresh or not self._stats:
            self.update_status()
        return self._stats

    def get_disk_used(self, sr_ref):
        """Since glance images are downloaded and snapshotted before they are
        used, only a small proportion of its VDI will be in use and it will
        never grow.  We only need to count the virtual size for disks that
        are attached to a VM - every other disk can count physical.
        """

        def _vdi_attached(vdi_ref):
            try:
                vbds = self._session.VDI.get_VBDs(vdi_ref)
                for vbd in vbds:
                    if self._session.VBD.get_currently_attached(vbd):
                        return True
            except self._session.XenAPI.Failure:
                # VDI or VBD may no longer exist - in which case, it's
                # not attached
                pass
            return False

        allocated = 0
        physical_used = 0

        all_vdis = self._session.SR.get_VDIs(sr_ref)
        for vdi_ref in all_vdis:
            try:
                vdi_physical = \
                    int(self._session.VDI.get_physical_utilisation(vdi_ref))
                if _vdi_attached(vdi_ref):
                    allocated += \
                        int(self._session.VDI.get_virtual_size(vdi_ref))
                else:
                    allocated += vdi_physical
                physical_used += vdi_physical
            except (ValueError, self._session.XenAPI.Failure):
                LOG.exception(_('Unable to get size for vdi %s'), vdi_ref)

        return (allocated, physical_used)

    def update_status(self):
        """Since under Xenserver, a compute node runs on a given host,
        we can get host status information using xenapi.
        """
        LOG.debug("Updating host stats")
        data = _call_host_management(self._session,
                                     host_management.get_host_data)
        if data:
            sr_ref = vm_utils.scan_default_sr(self._session)
            sr_rec = self._session.SR.get_record(sr_ref)
            total = int(sr_rec["physical_size"])
            (allocated, used) = self.get_disk_used(sr_ref)
            data["disk_total"] = total
            data["disk_used"] = used
            data["disk_allocated"] = allocated
            data["disk_available"] = total - used
            data["supported_instances"] = to_supported_instances(
                data.get("host_capabilities")
            )
            data["cpu_model"] = to_cpu_model(
                data.get("host_cpu_info")
            )
            host_memory = data.get('host_memory', None)
            if host_memory:
                data["host_memory_total"] = host_memory.get('total', 0)
                data["host_memory_overhead"] = host_memory.get('overhead', 0)
                data["host_memory_free"] = host_memory.get('free', 0)
                data["host_memory_free_computed"] = host_memory.get(
                                                    'free-computed', 0)
                del data['host_memory']
            if (data['host_hostname'] !=
                    self._stats.get('host_hostname', data['host_hostname'])):
                LOG.error('Hostname has changed from %(old)s to %(new)s. '
                          'A restart is required to take effect.',
                          {'old': self._stats['host_hostname'],
                           'new': data['host_hostname']})
                data['host_hostname'] = self._stats['host_hostname']
            data['hypervisor_hostname'] = data['host_hostname']
            vcpus_used = 0
            for vm_ref, vm_rec in vm_utils.list_vms(self._session):
                vcpus_used = vcpus_used + int(vm_rec['VCPUs_max'])
            data['vcpus_used'] = vcpus_used
            data['pci_passthrough_devices'] = self._get_passthrough_devices()
            data['vgpu_stats'] = self._get_vgpu_stats()
            self._stats = data


def to_supported_instances(host_capabilities):
    if not host_capabilities:
        return []

    result = []
    for capability in host_capabilities:
        try:
            # 'capability'is unicode but we want arch/ostype
            # to be strings to match the standard constants
            capability = str(capability)

            ostype, _version, guestarch = capability.split("-")

            guestarch = obj_fields.Architecture.canonicalize(guestarch)
            ostype = obj_fields.VMMode.canonicalize(ostype)

            result.append((guestarch, obj_fields.HVType.XEN, ostype))
        except ValueError:
            LOG.warning("Failed to extract instance support from %s",
                        capability)

    return result


def to_cpu_model(host_cpu_info):
    # The XenAPI driver returns data in the format
    #
    # {"physical_features": "0098e3fd-bfebfbff-00000001-28100800",
    #  "modelname": "Intel(R) Xeon(R) CPU           X3430  @ 2.40GHz",
    #  "vendor": "GenuineIntel",
    #  "features": "0098e3fd-bfebfbff-00000001-28100800",
    #  "family": 6,
    #  "maskable": "full",
    #  "cpu_count": 4,
    #  "socket_count": "1",
    #  "flags": "fpu de tsc msr pae mce cx8 apic sep mtrr mca cmov
    #            pat clflush acpi mmx fxsr sse sse2 ss ht nx
    #            constant_tsc nonstop_tsc aperfmperf pni vmx est
    #            ssse3 sse4_1 sse4_2 popcnt hypervisor ida
    #            tpr_shadow vnmi flexpriority ept vpid",
    #  "stepping": 5,
    #  "model": 30,
    #  "features_after_reboot": "0098e3fd-bfebfbff-00000001-28100800",
    #  "speed": "2394.086"}

    if host_cpu_info is None:
        return None

    cpu_info = dict()
    # TODO(berrange) the data we're putting in model is not
    # exactly comparable to what libvirt puts in model. The
    # libvirt model names are a well defined short string
    # which is really an aliass for a particular set of
    # feature flags. The Xen model names are raw printable
    # strings from the kernel with no specific semantics
    cpu_info["model"] = host_cpu_info["modelname"]
    cpu_info["vendor"] = host_cpu_info["vendor"]
    # TODO(berrange) perhaps we could fill in 'arch' field too
    # by looking at 'host_capabilities' for the Xen host ?

    topology = dict()
    topology["sockets"] = int(host_cpu_info["socket_count"])
    topology["cores"] = (int(host_cpu_info["cpu_count"]) /
                         int(host_cpu_info["socket_count"]))
    # TODO(berrange): if 'ht' is present in the 'flags' list
    # is it possible to infer that the 'cpu_count' is in fact
    # sockets * cores * threads ? Unclear if 'ht' would remain
    # visible when threads are disabled in BIOS ?
    topology["threads"] = 1

    cpu_info["topology"] = topology

    cpu_info["features"] = host_cpu_info["flags"].split(" ")

    return cpu_info


def call_xenhost(session, method, arg_dict):
    """There will be several methods that will need this general
    handling for interacting with the xenhost plugin, so this abstracts
    out that behavior.
    """
    # Create a task ID as something that won't match any instance ID
    try:
        result = session.call_plugin('xenhost.py', method, args=arg_dict)
        if not result:
            return ''
        return jsonutils.loads(result)
    except ValueError:
        LOG.exception(_("Unable to get updated status"))
        return None
    except session.XenAPI.Failure as e:
        LOG.error("The call to %(method)s returned "
                  "an error: %(e)s.", {'method': method, 'e': e})
        return e.details[1]


def _call_host_management(session, method, *args):
    """There will be several methods that will need this general
    handling for interacting with the dom0 plugin, so this abstracts
    out that behavior. the call_xenhost will be removed once we deprecated
    those functions which are not needed anymore
    """
    try:
        result = method(session, *args)
        if not result:
            return ''
        return jsonutils.loads(result)
    except ValueError:
        LOG.exception(_("Unable to get updated status"))
        return None
    except session.XenAPI.Failure as e:
        LOG.error("The call to %(method)s returned an error: %(e)s.",
                  {'method': method.__name__, 'e': e})
        return e.details[1]


def _uuid_find(context, host, name_label):
    """Return instance uuid by name_label."""
    for i in objects.InstanceList.get_by_host(context, host):
        if i.name == name_label:
            return i.uuid
    return None


def _host_find(context, session, src_aggregate, host_ref):
    """Return the host from the xenapi host reference.

    :param src_aggregate: the aggregate that the compute host being put in
                          maintenance (source of VMs) belongs to
    :param host_ref: the hypervisor host reference (destination of VMs)

    :return: the compute host that manages host_ref
    """
    # NOTE: this would be a lot simpler if nova-compute stored
    # CONF.host in the XenServer host's other-config map.
    # TODO(armando-migliaccio): improve according the note above
    uuid = session.host.get_uuid(host_ref)
    for compute_host, host_uuid in src_aggregate.metadetails.items():
        if host_uuid == uuid:
            return compute_host
    raise exception.NoValidHost(reason='Host %(host_uuid)s could not be found '
                                'from aggregate metadata: %(metadata)s.' %
                                {'host_uuid': uuid,
                                 'metadata': src_aggregate.metadetails})