From 945ab28df04e22f3c1b8948972f538ee6b5e7410 Mon Sep 17 00:00:00 2001
From: "Daniel P. Berrange" <berrange@redhat.com>
Date: Wed, 25 Feb 2015 10:11:14 +0000
Subject: [PATCH] libvirt: fix disablement of NUMA & hugepages on unsupported
 platforms

Two previous commits updated the libvirt XML config generator so
that it would omit the config elements for huge pages and NUMA
placement when running on old libvirt:

  commit cf3a1262ecf12f4345326309c273722ebc26b466
  Author: Vladik Romanovsky <vladik.romanovsky@enovance.com>
  Date:   Wed Jan 28 00:54:26 2015 -0500

    libvirt: avoid setting the memnodes where when it's not a supported option

  commit 3466e727546e0f7595378b8274254bed913f42ee
  Author: Vladik Romanovsky <vladik.romanovsky@enovance.com>
  Date:   Tue Jan 6 14:23:38 2015 -0500

    libvirt: not setting membacking when mempages are empty host topology

The problem arising from this is that the hosts are still reporting
to the schedular that they can support NUMA and huge pages if
libvirt >= 1.0.4, but we are silently discarding the guest config
if libvirt < 1.2.7 (numa) or 1.2.8 (huge pages).

The result is that the schedular thinks the host can support the
requested feature and so places the guest there, but the actual
guest that is launched is missing the feature. So the user is not
getting what they requested.

The correct approach is to update the "_get_host_numa_topology"
method so that it does not report any NUMA topology in the first
place, if the host is incapable of having guests configured in
the way Nova requires. Likewise if the host cannot support the
huge page configuration method required, it should not report
availability of huge pages.

In summary, we are moving the version checks added by the two
commits above to the point at which host capability reporting
is done. We add a fatal error check in the guest XML config
generator methods, so that if there is some future schedular
bug causing it to not honour host capabilities, we see a
clear error instead of silently starting guests with the
wrong config.

There is a further mistake in that a version number check alone
is insufficient. We must also check that the hypervisor is
either QEMU or KVM, to prevent the code paths running on Xen
or LXC.

Closes-bug: #1425115
Related-bug: #1408070
Related-bug: #1415333

Change-Id: I8326596696cbb030ae95d17f3dd430d05279081a
---
 nova/exception.py                           |   8 +
 nova/tests/unit/virt/libvirt/test_driver.py | 174 +++++++++++++-------
 nova/virt/libvirt/driver.py                 | 154 ++++++++++-------
 3 files changed, 221 insertions(+), 115 deletions(-)

diff --git a/nova/exception.py b/nova/exception.py
index ddfa728eb5c..7eea3925aa7 100644
--- a/nova/exception.py
+++ b/nova/exception.py
@@ -1861,3 +1861,11 @@ class UnsupportedPolicyException(Invalid):
 
 class CellMappingNotFound(NotFound):
     msg_fmt = _("Cell %(uuid)s has no mapping.")
+
+
+class NUMATopologyUnsupported(Invalid):
+    msg_fmt = _("Host does not support guests with NUMA topology set")
+
+
+class MemoryPagesUnsupported(Invalid):
+    msg_fmt = _("Host does not support guests with custom memory page sizes")
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index 5fbe5e18472..daf6d02a16d 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -1175,32 +1175,32 @@ def test_get_guest_memory_backing_config_large_success(self, mock_version):
         self.assertEqual(2048, result.hugepages[0].size_kb)
         self.assertEqual([0], result.hugepages[0].nodeset)
 
-    def test_get_guest_memory_backing_config_mempages_none(self):
-        with mock.patch.object(host.Host, 'has_min_version',
-                               return_value=False):
-            self.assertIsNone(self._test_get_guest_memory_backing_config(
-                              'not_empty', 'not_empty', 'not_empty'))
+    @mock.patch.object(host.Host,
+                       'has_min_version', return_value=True)
+    def test_get_guest_memory_backing_config_smallest(self, mock_version):
+        host_topology = objects.NUMATopology(
+            cells=[
+                objects.NUMACell(
+                    id=3, cpuset=set([1]), memory=1024, mempages=[
+                        objects.NUMAPagesTopology(size_kb=4, total=2000,
+                                                  used=0),
+                        objects.NUMAPagesTopology(size_kb=2048, total=512,
+                                                  used=0),
+                        objects.NUMAPagesTopology(size_kb=1048576, total=0,
+                                                  used=0),
+                    ])])
+        inst_topology = objects.InstanceNUMATopology(cells=[
+            objects.InstanceNUMACell(
+                id=3, cpuset=set([0, 1]), memory=1024, pagesize=4)])
 
-    def test_get_guest_numa_tune_memnodes(self):
-        drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
-        guest_cpu_numa_config = objects.InstanceNUMATopology(cells=[
-                                objects.InstanceNUMACell(
-                                id=1, cpuset=set([0, 1]),
-                                memory=1024, pagesize=2048)])
-        memnodes = [vconfig.LibvirtConfigGuestNUMATuneMemNode()]
-        with mock.patch.object(host.Host, 'has_min_version',
-                               return_value=True):
-            self.assertTrue(drvr._get_guest_numa_tune_memnodes(
-                              guest_cpu_numa_config, memnodes))
-            self.assertEqual(guest_cpu_numa_config.cells[0].id,
-                             memnodes[0].cellid)
-
-    def test_get_guest_numa_tune_memnodes_none(self):
-        drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
-        with mock.patch.object(host.Host, 'has_min_version',
-                               return_value=False):
-            self.assertFalse(drvr._get_guest_numa_tune_memnodes(
-                              'something', 'something'))
+        numa_tune = vconfig.LibvirtConfigGuestNUMATune()
+        numa_tune.memnodes = [vconfig.LibvirtConfigGuestNUMATuneMemNode()]
+        numa_tune.memnodes[0].cellid = 0
+        numa_tune.memnodes[0].nodeset = [3]
+
+        result = self._test_get_guest_memory_backing_config(
+            host_topology, inst_topology, numa_tune)
+        self.assertIsNone(result)
 
     def test_get_guest_config_numa_host_instance_1pci_fits(self):
         instance_ref = objects.Instance(**self.test_instance)
@@ -1339,10 +1339,17 @@ def test_get_guest_config_numa_host_instance_2pci_no_fit(self):
             self.assertEqual(0, len(cfg.cputune.vcpupin))
             self.assertIsNone(cfg.cpu.numa)
 
-    def test_get_guest_config_numa_empty_memnode(self):
+    @mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
+    @mock.patch.object(host.Host, 'get_capabilities')
+    @mock.patch.object(libvirt_driver.LibvirtDriver, '_set_host_enabled')
+    def _test_get_guest_config_numa_unsupported(self, fake_version,
+                                                exception_class, pagesize,
+                                                mock_host,
+                                                mock_caps, mock_version):
         instance_topology = objects.InstanceNUMATopology(
                     cells=[objects.InstanceNUMACell(
-                        id=0, cpuset=set([0]), memory=1024)])
+                        id=0, cpuset=set([0]),
+                        memory=1024, pagesize=pagesize)])
         instance_ref = objects.Instance(**self.test_instance)
         instance_ref.numa_topology = instance_topology
         image_meta = {}
@@ -1357,25 +1364,44 @@ def test_get_guest_config_numa_empty_memnode(self):
         caps.host.cpu.arch = "x86_64"
         caps.host.topology = self._fake_caps_numa_topology()
 
-        def fake_has_min_version(ver):
-            if ver == libvirt_driver.MIN_LIBVIRT_MEMNODE_VERSION:
-                return False
-            return True
+        mock_version.return_value = fake_version
+        mock_caps.return_value = caps
 
         drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
         disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
                                             instance_ref,
                                             image_meta)
 
-        with contextlib.nested(
-                mock.patch.object(host.Host, 'has_min_version',
-                                  side_effect=fake_has_min_version),
-                mock.patch.object(host.Host, "get_capabilities",
-                                  return_value=caps),
-                mock.patch.object(
-                        random, 'choice', side_effect=lambda cells: cells[0])):
-            cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info)
-            self.assertFalse(cfg.numatune.memnodes)
+        self.assertRaises(exception_class,
+                          drvr._get_guest_config,
+                          instance_ref, [], {}, disk_info)
+
+    def test_get_guest_config_numa_old_version(self):
+        self.flags(virt_type='kvm', group='libvirt')
+
+        self._test_get_guest_config_numa_unsupported(
+            utils.convert_version_to_int(
+                libvirt_driver.MIN_LIBVIRT_NUMA_VERSION) - 1,
+            exception.NUMATopologyUnsupported,
+            None)
+
+    def test_get_guest_config_numa_xen(self):
+        self.flags(virt_type='xen', group='libvirt')
+
+        self._test_get_guest_config_numa_unsupported(
+            utils.convert_version_to_int(
+                libvirt_driver.MIN_LIBVIRT_NUMA_VERSION),
+            exception.NUMATopologyUnsupported,
+            None)
+
+    def test_get_guest_config_numa_old_pages(self):
+        self.flags(virt_type='kvm', group='libvirt')
+
+        self._test_get_guest_config_numa_unsupported(
+            utils.convert_version_to_int(
+                libvirt_driver.MIN_LIBVIRT_HUGEPAGE_VERSION) - 1,
+            exception.MemoryPagesUnsupported,
+            2048)
 
     def test_get_guest_config_numa_host_instance_fit_w_cpu_pinset(self):
         instance_ref = objects.Instance(**self.test_instance)
@@ -8724,7 +8750,7 @@ def _fake_caps_numa_topology(self,
 
         return topology
 
-    def test_get_host_numa_topology(self):
+    def _test_get_host_numa_topology(self, mempages):
         caps = vconfig.LibvirtConfigCaps()
         caps.host = vconfig.LibvirtConfigCapsHost()
         caps.host.topology = self._fake_caps_numa_topology()
@@ -8744,8 +8770,6 @@ def test_get_host_numa_topology(self):
                                   'mem': {'total': 1024, 'used': 0},
                                   'id': 3}]}
         with contextlib.nested(
-                mock.patch.object(host.Host, 'has_min_version',
-                                  return_value=True),
                 mock.patch.object(host.Host, "get_capabilities",
                                   return_value=caps),
                 mock.patch.object(
@@ -8758,16 +8782,21 @@ def test_get_host_numa_topology(self):
             got_topo_dict = got_topo._to_dict()
             self.assertThat(
                     expected_topo_dict, matchers.DictMatches(got_topo_dict))
-            # cells 0
-            self.assertEqual(4, got_topo.cells[0].mempages[0].size_kb)
-            self.assertEqual(0, got_topo.cells[0].mempages[0].total)
-            self.assertEqual(2048, got_topo.cells[0].mempages[1].size_kb)
-            self.assertEqual(0, got_topo.cells[0].mempages[1].total)
-            # cells 1
-            self.assertEqual(4, got_topo.cells[1].mempages[0].size_kb)
-            self.assertEqual(1024, got_topo.cells[1].mempages[0].total)
-            self.assertEqual(2048, got_topo.cells[1].mempages[1].size_kb)
-            self.assertEqual(1, got_topo.cells[1].mempages[1].total)
+
+            if mempages:
+                # cells 0
+                self.assertEqual(4, got_topo.cells[0].mempages[0].size_kb)
+                self.assertEqual(0, got_topo.cells[0].mempages[0].total)
+                self.assertEqual(2048, got_topo.cells[0].mempages[1].size_kb)
+                self.assertEqual(0, got_topo.cells[0].mempages[1].total)
+                # cells 1
+                self.assertEqual(4, got_topo.cells[1].mempages[0].size_kb)
+                self.assertEqual(1024, got_topo.cells[1].mempages[0].total)
+                self.assertEqual(2048, got_topo.cells[1].mempages[1].size_kb)
+                self.assertEqual(1, got_topo.cells[1].mempages[1].total)
+            else:
+                self.assertEqual([], got_topo.cells[0].mempages)
+                self.assertEqual([], got_topo.cells[1].mempages)
 
             self.assertEqual(expected_topo_dict, got_topo_dict)
             self.assertEqual(set([]), got_topo.cells[0].pinned_cpus)
@@ -8777,6 +8806,22 @@ def test_get_host_numa_topology(self):
             self.assertEqual([set([0, 1])], got_topo.cells[0].siblings)
             self.assertEqual([], got_topo.cells[1].siblings)
 
+    @mock.patch.object(host.Host, 'has_min_version')
+    def test_get_host_numa_topology(self, mock_version):
+        def fake_version(ver):
+            return True
+
+        mock_version.side_effect = fake_version
+        self._test_get_host_numa_topology(mempages=True)
+
+    @mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
+    def test_get_host_numa_topology_no_mempages(self, mock_version):
+        self.flags(virt_type='kvm', group='libvirt')
+
+        mock_version.return_value = utils.convert_version_to_int(
+                libvirt_driver.MIN_LIBVIRT_HUGEPAGE_VERSION) - 1
+        self._test_get_host_numa_topology(mempages=False)
+
     def test_get_host_numa_topology_empty(self):
         caps = vconfig.LibvirtConfigCaps()
         caps.host = vconfig.LibvirtConfigCapsHost()
@@ -8791,12 +8836,25 @@ def test_get_host_numa_topology_empty(self):
             self.assertIsNone(drvr._get_host_numa_topology())
         get_caps.assert_called_once_with()
 
-    def test_get_host_numa_topology_not_supported(self):
-        # Tests that libvirt isn't new enough to support numa topology.
+    @mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
+    def test_get_host_numa_topology_old_version(self, mock_version):
+        self.flags(virt_type='kvm', group='libvirt')
         drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
-        with mock.patch.object(host.Host, 'has_min_version',
-                               return_value=False):
-            self.assertIsNone(drvr._get_host_numa_topology())
+
+        mock_version.side_effect = utils.convert_version_to_int(
+                libvirt_driver.MIN_LIBVIRT_NUMA_VERSION) - 1
+        self.assertIsNone(drvr._get_host_numa_topology())
+
+    @mock.patch.object(host.Host, 'has_min_version')
+    def test_get_host_numa_topology_xen(self, mock_version):
+        self.flags(virt_type='xen', group='libvirt')
+        drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+
+        def fake_version(ver):
+            return True
+
+        mock_version.side_effect = fake_version
+        self.assertIsNone(drvr._get_host_numa_topology())
 
     def test_diagnostic_vcpus_exception(self):
         xml = """
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index f65ee8ae28d..811abd5adbf 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -359,14 +359,20 @@ def repr_method(self):
 MIN_LIBVIRT_DISCARD_VERSION = (1, 0, 6)
 MIN_QEMU_DISCARD_VERSION = (1, 6, 0)
 REQ_HYPERVISOR_DISCARD = "QEMU"
-# libvirt numa topology support
-MIN_LIBVIRT_NUMA_TOPOLOGY_VERSION = (1, 0, 4)
+# While earlier versions could support NUMA reporting and
+# NUMA placement, not until 1.2.7 was there the ability
+# to pin guest nodes to host nodes, so mandate that. Without
+# this the scheduler cannot make guaranteed decisions, as the
+# guest placement may not match what was requested
+MIN_LIBVIRT_NUMA_VERSION = (1, 2, 7)
+# While earlier versions could support hugepage backed
+# guests, not until 1.2.8 was there the ability to request
+# a particular huge page size. Without this the scheduler
+# cannot make guaranteed decisions, as the huge page size
+# used by the guest may not match what was requested
+MIN_LIBVIRT_HUGEPAGE_VERSION = (1, 2, 8)
 # fsFreeze/fsThaw requirement
 MIN_LIBVIRT_FSFREEZE_VERSION = (1, 2, 5)
-# libvirt mempage size report
-MIN_LIBVIRT_MEMPAGES_VERSION = (1, 2, 8)
-# libvirt memory allocation policy for each guest NUMA node
-MIN_LIBVIRT_MEMNODE_VERSION = (1, 2, 7)
 
 # Hyper-V paravirtualized time source
 MIN_LIBVIRT_HYPERV_TIMER_VERSION = (1, 2, 2)
@@ -3426,6 +3432,15 @@ def _get_guest_numa_config(self, instance_numa_topology, flavor, pci_devs,
                not support NUMA, then guest_cpu_tune and guest_numa_tune
                will be None.
         """
+
+        if (not self._has_numa_support() and
+                instance_numa_topology is not None):
+            # We should not get here, since we should have avoided
+            # reporting NUMA topology from _get_host_numa_topology
+            # in the first place. Just in case of a scheduler
+            # mess up though, raise an exception
+            raise exception.NUMATopologyUnsupported()
+
         topology = self._get_host_numa_topology()
         # We have instance NUMA so translate it to the config class
         guest_cpu_numa_config = self._get_cpu_numa_config_from_instance(
@@ -3521,14 +3536,14 @@ def _get_guest_numa_config(self, instance_numa_topology, flavor, pci_devs,
                 guest_cpu_tune.vcpupin.sort(key=operator.attrgetter("id"))
 
                 guest_numa_tune.memory = numa_mem
+                guest_numa_tune.memnodes = numa_memnodes
 
                 # normalize cell.id
-                for i, cell in enumerate(guest_cpu_numa_config.cells):
+                for i, (cell, memnode) in enumerate(
+                                            zip(guest_cpu_numa_config.cells,
+                                                guest_numa_tune.memnodes)):
                     cell.id = i
-
-                guest_numa_tune.memnodes = self._get_guest_numa_tune_memnodes(
-                                                        guest_cpu_numa_config,
-                                                        numa_memnodes)
+                    memnode.cellid = i
 
                 return GuestNumaConfig(None, guest_cpu_tune,
                                        guest_cpu_numa_config,
@@ -3537,18 +3552,6 @@ def _get_guest_numa_config(self, instance_numa_topology, flavor, pci_devs,
                 return GuestNumaConfig(allowed_cpus, None,
                                        guest_cpu_numa_config, None)
 
-    def _get_guest_numa_tune_memnodes(self, guest_cpu_numa_config,
-                                      numa_memnodes):
-        if not self._host.has_min_version(MIN_LIBVIRT_MEMNODE_VERSION):
-            return []
-
-        # normalize cell.id
-        for (cell, memnode) in zip(guest_cpu_numa_config.cells,
-                                    numa_memnodes):
-            memnode.cellid = cell.id
-
-        return numa_memnodes
-
     def _get_guest_os_type(self, virt_type):
         """Returns the guest OS type based on virt type."""
         if virt_type == "lxc":
@@ -3779,37 +3782,52 @@ def _set_qemu_guest_agent(self, guest, flavor, instance, img_meta_prop):
             self._add_rng_device(guest, flavor)
 
     def _get_guest_memory_backing_config(self, inst_topology, numatune):
-        if not self._host.has_min_version(MIN_LIBVIRT_MEMPAGES_VERSION):
+        wantsmempages = False
+        if inst_topology:
+            for cell in inst_topology.cells:
+                if cell.pagesize:
+                    wantsmempages = True
+
+        if not wantsmempages:
             return
 
+        if not self._has_hugepage_support():
+            # We should not get here, since we should have avoided
+            # reporting NUMA topology from _get_host_numa_topology
+            # in the first place. Just in case of a scheduler
+            # mess up though, raise an exception
+            raise exception.MemoryPagesUnsupported()
+
         host_topology = self._get_host_numa_topology()
 
-        membacking = None
-        if inst_topology and host_topology:
-            # Currently libvirt does not support the smallest
-            # pagesize set as a backend memory.
-            # https://bugzilla.redhat.com/show_bug.cgi?id=1173507
-            avail_pagesize = [page.size_kb
-                              for page in host_topology.cells[0].mempages]
-            avail_pagesize.sort()
-            smallest = avail_pagesize[0]
-
-            pages = []
-            for guest_cellid, inst_cell in enumerate(inst_topology.cells):
-                if inst_cell.pagesize and inst_cell.pagesize > smallest:
-                    for memnode in numatune.memnodes:
-                        if guest_cellid == memnode.cellid:
-                            page = (
-                                vconfig.LibvirtConfigGuestMemoryBackingPage())
-                            page.nodeset = [guest_cellid]
-                            page.size_kb = inst_cell.pagesize
-                            pages.append(page)
-                            break  # Quit early...
-            if pages:
-                membacking = vconfig.LibvirtConfigGuestMemoryBacking()
-                membacking.hugepages = pages
-
-        return membacking
+        if host_topology is None:
+            # As above, we should not get here but just in case...
+            raise exception.MemoryPagesUnsupported()
+
+        # Currently libvirt does not support the smallest
+        # pagesize set as a backend memory.
+        # https://bugzilla.redhat.com/show_bug.cgi?id=1173507
+        avail_pagesize = [page.size_kb
+                          for page in host_topology.cells[0].mempages]
+        avail_pagesize.sort()
+        smallest = avail_pagesize[0]
+
+        pages = []
+        for guest_cellid, inst_cell in enumerate(inst_topology.cells):
+            if inst_cell.pagesize and inst_cell.pagesize > smallest:
+                for memnode in numatune.memnodes:
+                    if guest_cellid == memnode.cellid:
+                        page = (
+                            vconfig.LibvirtConfigGuestMemoryBackingPage())
+                        page.nodeset = [guest_cellid]
+                        page.size_kb = inst_cell.pagesize
+                        pages.append(page)
+                        break  # Quit early...
+
+        if pages:
+            membacking = vconfig.LibvirtConfigGuestMemoryBacking()
+            membacking.hugepages = pages
+            return membacking
 
     def _get_flavor(self, ctxt, instance, flavor):
         if flavor is not None:
@@ -4701,8 +4719,26 @@ def _get_pci_passthrough_devices(self):
 
         return jsonutils.dumps(pci_info)
 
+    def _has_numa_support(self):
+        if not self._host.has_min_version(MIN_LIBVIRT_NUMA_VERSION):
+            return False
+
+        if CONF.libvirt.virt_type not in ['qemu', 'kvm']:
+            return False
+
+        return True
+
+    def _has_hugepage_support(self):
+        if not self._host.has_min_version(MIN_LIBVIRT_HUGEPAGE_VERSION):
+            return False
+
+        if CONF.libvirt.virt_type not in ['qemu', 'kvm']:
+            return False
+
+        return True
+
     def _get_host_numa_topology(self):
-        if not self._host.has_min_version(MIN_LIBVIRT_NUMA_TOPOLOGY_VERSION):
+        if not self._has_numa_support():
             return
 
         caps = self._host.get_capabilities()
@@ -4731,17 +4767,21 @@ def _get_host_numa_topology(self):
             # Filter out singles and empty sibling sets that may be left
             siblings = [sib for sib in siblings if len(sib) > 1]
 
+            mempages = []
+            if self._has_hugepage_support():
+                mempages = [
+                    objects.NUMAPagesTopology(
+                        size_kb=pages.size,
+                        total=pages.total,
+                        used=0)
+                    for pages in cell.mempages]
+
             cell = objects.NUMACell(id=cell.id, cpuset=cpuset,
                                     memory=cell.memory / units.Ki,
                                     cpu_usage=0, memory_usage=0,
                                     siblings=siblings,
                                     pinned_cpus=set([]),
-                                    mempages=[
-                                        objects.NUMAPagesTopology(
-                                            size_kb=pages.size,
-                                            total=pages.total,
-                                            used=0)
-                                    for pages in cell.mempages])
+                                    mempages=mempages)
             cells.append(cell)
 
         return objects.NUMATopology(cells=cells)