Add base implementation for efficient cross-cell instance listing

This adds a new set of routines that can list instances efficiently across cells while retaining a stable sort ordering. The theory here is that we do the query to the cells in parallel, and merge sort the results. This is not yet plugged into the actual list instances api call because it lacks some important features (limit, markers) which are added in subsequent patches. Change-Id: I3ffed6eb7008df9d728fe9728b368fec8d3434bc
openstack · Sep 21, 2017 · bac8f5b · bac8f5b
1 parent 0aeaa2b
commit bac8f5b
Show file tree

Hide file tree

Showing 3 changed files with 382 additions and 0 deletions.
diff --git a/nova/compute/instance_list.py b/nova/compute/instance_list.py
@@ -0,0 +1,119 @@
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import heapq
+
+from nova import context
+from nova import db
+
+
+class InstanceSortContext(object):
+    def __init__(self, sort_keys, sort_dirs):
+        self._sort_keys = sort_keys
+        self._sort_dirs = sort_dirs
+
+    def compare_instances(self, inst1, inst2):
+        """Implements cmp(inst1, inst2) for the first key that is different
+
+        Adjusts for the requested sort direction by inverting the result
+        as needed.
+        """
+        for skey, sdir in zip(self._sort_keys, self._sort_dirs):
+            resultflag = 1 if sdir == 'desc' else -1
+            if inst1[skey] < inst2[skey]:
+                return resultflag
+            elif inst1[skey] > inst2[skey]:
+                return resultflag * -1
+        return 0
+
+
+class InstanceWrapper(object):
+    """Wrap an instance object from the database so it is sortable.
+
+    We use heapq.merge() below to do the merge sort of things from the
+    cell databases. That routine assumes it can use regular python
+    operators (> and <) on the contents. Since that won't work with
+    instances from the database (and depends on the sort keys/dirs),
+    we need this wrapper class to provide that.
+
+    Implementing __lt__ is enough for heapq.merge() to do its work.
+    """
+    def __init__(self, sort_ctx, db_instance):
+        self._sort_ctx = sort_ctx
+        self._db_instance = db_instance
+
+    def __lt__(self, other):
+        r = self._sort_ctx.compare_instances(self._db_instance,
+                                             other._db_instance)
+        # cmp(x, y) returns -1 if x < y
+        return r == -1
+
+
+def get_instances_sorted(ctx, filters, limit, marker, columns_to_join,
+                         sort_keys, sort_dirs):
+    """Get a cross-cell list of instances matching filters.
+
+    This iterates cells in parallel generating a unified and sorted
+    list of instances as efficiently as possible. It takes care to
+    iterate the list as infrequently as possible. We wrap the results
+    in InstanceWrapper objects so that they are sortable by
+    heapq.merge(), which requires that the '<' operator just works. We
+    encapsulate our sorting requirements into an InstanceSortContext
+    which we pass to all of the wrappers so they behave the way we
+    want.
+
+    This function is a generator of instances from the database like what you
+    would get from instance_get_all_by_filters_sort() in the DB API.
+
+    FIXME: Make limit work
+    FIXME: Make marker work
+    """
+
+    if not sort_keys:
+        # This is the default from the process_sort_params() method in
+        # the DB API. It doesn't really matter, as this only comes into
+        # play if the user didn't ask for a specific ordering, but we
+        # use the same scheme for consistency.
+        sort_keys = ['created_at', 'id']
+        sort_dirs = ['asc', 'asc']
+
+    sort_ctx = InstanceSortContext(sort_keys, sort_dirs)
+
+    def do_query(ctx):
+        """Generate InstanceWrapper(Instance) objects from a cell.
+
+        We do this inside the thread (created by
+        scatter_gather_all_cells()) so that we return wrappers and
+        avoid having to iterate the combined result list in the caller
+        again. This is run against each cell by the scatter_gather
+        routine.
+        """
+
+        return (InstanceWrapper(sort_ctx, inst) for inst in
+                db.instance_get_all_by_filters_sort(
+                    ctx, filters,
+                    limit=limit, marker=marker,
+                    columns_to_join=columns_to_join,
+                    sort_keys=sort_keys,
+                sort_dirs=sort_dirs))
+
+    # FIXME(danms): If we raise or timeout on a cell we need to handle
+    # that here gracefully. The below routine will provide sentinels
+    # to indicate that, which will crash the merge below, but we don't
+    # handle this anywhere yet anyway.
+    results = context.scatter_gather_all_cells(ctx, do_query)
+
+    # Generate results from heapq so we can return the inner
+    # instance instead of the wrapper. This is basically free
+    # as it works as our caller iterates the results.
+    for i in heapq.merge(*results.values()):
+        yield i._db_instance
diff --git a/nova/tests/functional/compute/test_instance_list.py b/nova/tests/functional/compute/test_instance_list.py
@@ -0,0 +1,113 @@
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import datetime
+
+from nova.compute import instance_list
+from nova import context
+from nova import objects
+from nova import test
+
+
+class InstanceListTestCase(test.TestCase):
+    NUMBER_OF_CELLS = 3
+
+    def setUp(self):
+        super(InstanceListTestCase, self).setUp()
+
+        self.context = context.RequestContext('fake', 'fake')
+        self.num_instances = 3
+        self.instances = []
+
+        dt = datetime.datetime(1985, 10, 25, 1, 21, 0)
+        spread = datetime.timedelta(minutes=10)
+
+        cells = objects.CellMappingList.get_all(self.context)
+        # Create three instances in each of the real cells. Leave the
+        # first cell empty to make sure we don't break with an empty
+        # one.
+        for cell in cells[1:]:
+            for i in range(0, self.num_instances):
+                with context.target_cell(self.context, cell) as cctx:
+                    inst = objects.Instance(
+                        context=cctx,
+                        project_id=self.context.project_id,
+                        user_id=self.context.user_id,
+                        launched_at=dt,
+                        instance_type_id=i,
+                        hostname='%s-inst%i' % (cell.name, i))
+                    inst.create()
+                self.instances.append(inst)
+                im = objects.InstanceMapping(context=self.context,
+                                             project_id=inst.project_id,
+                                             user_id=inst.user_id,
+                                             instance_uuid=inst.uuid,
+                                             cell_mapping=cell)
+                im.create()
+                dt += spread
+
+    def test_get_sorted(self):
+        filters = {}
+        limit = None
+        marker = None
+        columns = []
+        sort_keys = ['uuid']
+        sort_dirs = ['asc']
+        insts = instance_list.get_instances_sorted(self.context, filters,
+                                                   limit, marker, columns,
+                                                   sort_keys, sort_dirs)
+        uuids = [inst['uuid'] for inst in insts]
+        self.assertEqual(sorted(uuids), uuids)
+        self.assertEqual(len(self.instances), len(uuids))
+
+    def test_get_sorted_descending(self):
+        filters = {}
+        limit = None
+        marker = None
+        columns = []
+        sort_keys = ['uuid']
+        sort_dirs = ['desc']
+        insts = instance_list.get_instances_sorted(self.context, filters,
+                                                   limit, marker, columns,
+                                                   sort_keys, sort_dirs)
+        uuids = [inst['uuid'] for inst in insts]
+        self.assertEqual(list(reversed(sorted(uuids))), uuids)
+        self.assertEqual(len(self.instances), len(uuids))
+
+    def test_get_sorted_with_filter(self):
+        filters = {'instance_type_id': 1}
+        limit = None
+        marker = None
+        columns = []
+        sort_keys = ['uuid']
+        sort_dirs = ['asc']
+        insts = instance_list.get_instances_sorted(self.context, filters,
+                                                   limit, marker, columns,
+                                                   sort_keys, sort_dirs)
+        uuids = [inst['uuid'] for inst in insts]
+        expected = [inst['uuid'] for inst in self.instances
+                    if inst['instance_type_id'] == 1]
+        self.assertEqual(list(sorted(expected)), uuids)
+
+    def test_get_sorted_by_defaults(self):
+        filters = {}
+        limit = None
+        marker = None
+        columns = []
+        sort_keys = None
+        sort_dirs = None
+        insts = instance_list.get_instances_sorted(self.context, filters,
+                                                   limit, marker, columns,
+                                                   sort_keys, sort_dirs)
+        uuids = set([inst['uuid'] for inst in insts])
+        expected = set([inst['uuid'] for inst in self.instances])
+        self.assertEqual(expected, uuids)
diff --git a/nova/tests/unit/compute/test_instance_list.py b/nova/tests/unit/compute/test_instance_list.py
@@ -0,0 +1,150 @@
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import datetime
+import mock
+
+from nova.compute import instance_list
+from nova import objects
+from nova import test
+from nova.tests import fixtures
+from nova.tests import uuidsentinel as uuids
+
+
+class TestUtils(test.NoDBTestCase):
+    def test_compare_simple(self):
+        dt1 = datetime.datetime(2015, 11, 5, 20, 30, 00)
+        dt2 = datetime.datetime(1955, 10, 25, 1, 21, 00)
+
+        inst1 = {'key0': 'foo', 'key1': 'd', 'key2': 456, 'key4': dt1}
+        inst2 = {'key0': 'foo', 'key1': 's', 'key2': 123, 'key4': dt2}
+
+        # Equal key0, inst == inst2
+        ctx = instance_list.InstanceSortContext(['key0'], ['asc'])
+        self.assertEqual(0, ctx.compare_instances(inst1, inst2))
+
+        # Equal key0, inst == inst2 (direction should not matter)
+        ctx = instance_list.InstanceSortContext(['key0'], ['desc'])
+        self.assertEqual(0, ctx.compare_instances(inst1, inst2))
+
+        # Ascending by key1, inst1 < inst2
+        ctx = instance_list.InstanceSortContext(['key1'], ['asc'])
+        self.assertEqual(-1, ctx.compare_instances(inst1, inst2))
+
+        # Descending by key1, inst2 < inst1
+        ctx = instance_list.InstanceSortContext(['key1'], ['desc'])
+        self.assertEqual(1, ctx.compare_instances(inst1, inst2))
+
+        # Ascending by key2, inst2 < inst1
+        ctx = instance_list.InstanceSortContext(['key2'], ['asc'])
+        self.assertEqual(1, ctx.compare_instances(inst1, inst2))
+
+        # Descending by key2, inst1 < inst2
+        ctx = instance_list.InstanceSortContext(['key2'], ['desc'])
+        self.assertEqual(-1, ctx.compare_instances(inst1, inst2))
+
+        # Ascending by key4, inst1 > inst2
+        ctx = instance_list.InstanceSortContext(['key4'], ['asc'])
+        self.assertEqual(1, ctx.compare_instances(inst1, inst2))
+
+        # Descending by key4, inst1 < inst2
+        ctx = instance_list.InstanceSortContext(['key4'], ['desc'])
+        self.assertEqual(-1, ctx.compare_instances(inst1, inst2))
+
+    def test_compare_multiple(self):
+        # key0 should not affect ordering, but key1 should
+
+        inst1 = {'key0': 'foo', 'key1': 'd', 'key2': 456}
+        inst2 = {'key0': 'foo', 'key1': 's', 'key2': 123}
+
+        # Should be equivalent to ascending by key1
+        ctx = instance_list.InstanceSortContext(['key0', 'key1'],
+                                                ['asc', 'asc'])
+        self.assertEqual(-1, ctx.compare_instances(inst1, inst2))
+
+        # Should be equivalent to descending by key1
+        ctx = instance_list.InstanceSortContext(['key0', 'key1'],
+                                                ['asc', 'desc'])
+        self.assertEqual(1, ctx.compare_instances(inst1, inst2))
+
+    def test_wrapper(self):
+        inst1 = {'key0': 'foo', 'key1': 'd', 'key2': 456}
+        inst2 = {'key0': 'foo', 'key1': 's', 'key2': 123}
+
+        # Should sort by key1
+        ctx = instance_list.InstanceSortContext(['key0', 'key1'],
+                                                ['asc', 'asc'])
+        iw1 = instance_list.InstanceWrapper(ctx, inst1)
+        iw2 = instance_list.InstanceWrapper(ctx, inst2)
+        # Check this both ways to make sure we're comparing against -1
+        # and not just nonzero return from cmp()
+        self.assertTrue(iw1 < iw2)
+        self.assertFalse(iw2 < iw1)
+
+        # Should sort reverse by key1
+        ctx = instance_list.InstanceSortContext(['key0', 'key1'],
+                                                ['asc', 'desc'])
+        iw1 = instance_list.InstanceWrapper(ctx, inst1)
+        iw2 = instance_list.InstanceWrapper(ctx, inst2)
+        # Check this both ways to make sure we're comparing against -1
+        # and not just nonzero return from cmp()
+        self.assertTrue(iw1 > iw2)
+        self.assertFalse(iw2 > iw1)
+
+
+class TestInstanceList(test.NoDBTestCase):
+    def setUp(self):
+        super(TestInstanceList, self).setUp()
+
+        cells = [objects.CellMapping(uuid=getattr(uuids, 'cell%i' % i),
+                                     name='cell%i' % i,
+                                     transport_url='fake:///',
+                                     database_connection='fake://')
+                 for i in range(0, 3)]
+
+        insts = {}
+        for cell in cells:
+            insts[cell.uuid] = list([
+                dict(
+                    uuid=getattr(uuids, '%s-inst%i' % (cell.name, i)),
+                    hostname='%s-inst%i' % (cell.name, i))
+                for i in range(0, 3)])
+
+        self.cells = cells
+        self.insts = insts
+        self.context = mock.sentinel.context
+        self.useFixture(fixtures.SpawnIsSynchronousFixture())
+
+    @mock.patch('nova.db.instance_get_all_by_filters_sort')
+    @mock.patch('nova.objects.CellMappingList.get_all')
+    def test_get_instances_sorted(self, mock_cells, mock_inst):
+        mock_cells.return_value = self.cells
+        insts_by_cell = self.insts.values()
+
+        mock_inst.side_effect = insts_by_cell
+        insts = instance_list.get_instances_sorted(self.context, {},
+                                                   None, None,
+                                                   [], ['hostname'], ['asc'])
+        insts_one = [inst['hostname'] for inst in insts]
+
+        # Reverse the order that we get things from the cells so we can
+        # make sure that the result is still sorted the same way
+        insts_by_cell = list(reversed(list(insts_by_cell)))
+        mock_inst.reset_mock()
+        mock_inst.side_effect = insts_by_cell
+
+        insts = instance_list.get_instances_sorted(self.context, {},
+                                                   None, None,
+                                                   [], ['hostname'], ['asc'])
+        insts_two = [inst['hostname'] for inst in insts]
+
+        self.assertEqual(insts_one, insts_two)