Amend metadata table for "explain" support. (#141)

* Amend metadata table for "explain" support. * QueryMetadataTable.get_explain_infos() returns information required for explain analysis. It is in the form of a list of ExplainInfo objects. * ExplainFilterInfo provides: - op_name: Operation is useful to estimate complexity, i.e. == is faster than between. - field_name: For checking if the field has index or not. - args: If there are $args, we can check the size of user provided input. * ExplainRecurseInfo provides the recursion depth for now. * fix ExplainFilterInfo -> FilterInfo. * fix lint error * remove unused import * lint issue * sort imports * Cleanup explain infos. - metadata table now has separate calls for FilterInfo and RecurseInfo - Unittest checks if any extra infos appear. - Unittest is deriving from the class and using self.assertEqual - setdefault is used instead of defaultdict * fix unused import * Allow multiple RecurseInfos. We only keep vertices in the metadata table. Multiple recursions at the same vertex for different edges should be recorded together. Name and direction of each edge is also added to the RecurseInfo. * unit test cleanup - test method names reflect test query name from test_input_data - has_edge_degree test added - multiple filters on same field test added - test expectations are simplified with variables * fix lint spacing * review fixes - FilterInfo field_name renamed to fields and is now a tuple. Most operators have one field, but name_or_alias sets (name,alias) tuple here. This makes it more consistent for callers to process filter's fields. - FilterInfo args field is now a tuple instead of list. - assertEqual order is fixed to better error display in tests. - One more test case for name_or_alias - Test expectation indentations improved. * fix lint errors * Simplify statement. * fix indent
kensho-technologies · Oct 23, 2018 · 4a97610 · 4a97610
1 parent 695fa5a
commit 4a97610
Show file tree

Hide file tree

Showing 4 changed files with 193 additions and 5 deletions.
diff --git a/graphql_compiler/compiler/compiler_frontend.py b/graphql_compiler/compiler/compiler_frontend.py
@@ -87,7 +87,7 @@
     invert_dict, is_vertex_field_name, strip_non_null_from_type, validate_output_name,
     validate_safe_string
 )
-from .metadata import LocationInfo, QueryMetadataTable
+from .metadata import LocationInfo, QueryMetadataTable, RecurseInfo
 
 
 # LocationStackEntry contains the following:
@@ -488,6 +488,10 @@ def _compile_vertex_ast(schema, current_schema_type, ast,
                                                edge_name,
                                                recurse_depth,
                                                within_optional_scope=within_optional_scope))
+            query_metadata_table.record_recurse_info(location,
+                                                     RecurseInfo(edge_direction=edge_direction,
+                                                                 edge_name=edge_name,
+                                                                 depth=recurse_depth))
         else:
             basic_blocks.append(blocks.Traverse(edge_direction, edge_name,
                                                 optional=edge_traversal_is_optional,
@@ -642,7 +646,7 @@ def _compile_ast_node_to_ir(schema, current_schema_type, ast, location, context)
     # step 1: apply local filter, if any
     for filter_operation_info in filter_operations:
         basic_blocks.append(
-            process_filter_directive(filter_operation_info, context))
+            process_filter_directive(filter_operation_info, location, context))
 
     if location.field is not None:
         # The location is at a property, compile the property data following P-steps.

diff --git a/graphql_compiler/compiler/filters.py b/graphql_compiler/compiler/filters.py
@@ -11,6 +11,7 @@
     get_uniquely_named_objects_by_name, is_vertex_field_name, is_vertex_field_type,
     strip_non_null_from_type, validate_safe_string
 )
+from .metadata import FilterInfo
 
 
 def scalar_leaf_only(operator):
@@ -593,15 +594,15 @@ def is_filter_with_outer_scope_vertex_field_operator(directive):
     return op_name in OUTER_SCOPE_VERTEX_FIELD_OPERATORS
 
 
-def process_filter_directive(filter_operation_info, context):
+def process_filter_directive(filter_operation_info, location, context):
     """Return a Filter basic block that corresponds to the filter operation in the directive.
 
     Args:
         filter_operation_info: FilterOperationInfo object, containing the directive and field info
                                of the field where the filter is to be applied.
+        location: Location where this filter is used.
         context: dict, various per-compilation data (e.g. declared tags, whether the current block
                  is optional, etc.). May be mutated in-place in this function!
-        directive: GraphQL @filter directive object, obtained from the AST node
 
     Returns:
         a Filter basic block that performs the requested filtering operation
@@ -641,4 +642,12 @@ def process_filter_directive(filter_operation_info, context):
         raise GraphQLCompilationError(u'The filter with op_name "{}" must be applied on a field. '
                                       u'It may not be applied on a type coercion.'.format(op_name))
 
+    fields = ((filter_operation_info.field_name,) if op_name != 'name_or_alias'
+              else ('name', 'alias'))
+
+    context['metadata'].record_filter_info(
+        location,
+        FilterInfo(fields=fields, op_name=op_name, args=tuple(operator_params))
+    )
+
     return process_func(filter_operation_info, context, operator_params)
diff --git a/graphql_compiler/compiler/metadata.py b/graphql_compiler/compiler/metadata.py
@@ -4,7 +4,7 @@
 
 import six
 
-from .helpers import Location
+from .helpers import FoldScopeLocation, Location
 
 
 LocationInfo = namedtuple(
@@ -24,6 +24,25 @@
 )
 
 
+FilterInfo = namedtuple(
+    'FilterInfo',
+    (
+        'fields',
+        'op_name',
+        'args',
+    )
+)
+
+RecurseInfo = namedtuple(
+    'RecurseInfo',
+    (
+        'edge_direction',
+        'edge_name',
+        'depth',
+    )
+)
+
+
 @six.python_2_unicode_compatible
 class QueryMetadataTable(object):
     """Query metadata container with info on locations, inputs, outputs, and tags in the query."""
@@ -45,6 +64,9 @@ def __init__(self, root_location, root_location_info):
         self._outputs = dict()               # dict, output name -> output info namedtuple
         self._tags = dict()                  # dict, tag name -> tag info namedtuple
 
+        self._filter_infos = dict()          # Location -> FilterInfo array
+        self._recurse_infos = dict()         # Location -> RecurseInfo array
+
         # dict, revisiting Location -> revisit origin, i.e. the first Location with that query path
         self._revisit_origins = dict()
 
@@ -132,6 +154,27 @@ def get_location_info(self, location):
                                  u'{}'.format(location))
         return location_info
 
+    def record_filter_info(self, location, filter_info):
+        """Record filter information about the location."""
+        if isinstance(location, FoldScopeLocation):
+            # NOTE(gurer): ignore filters inside the fold for now
+            return
+        record_location = location.at_vertex()
+        self._filter_infos.setdefault(record_location, []).append(filter_info)
+
+    def get_filter_infos(self, location):
+        """Get information about filters at the location."""
+        return self._filter_infos.get(location, [])
+
+    def record_recurse_info(self, location, recurse_info):
+        """Record recursion information about the location."""
+        record_location = location.at_vertex()
+        self._recurse_infos.setdefault(record_location, []).append(recurse_info)
+
+    def get_recurse_infos(self, location):
+        """Get information about recursions at the location."""
+        return self._recurse_infos.get(location, [])
+
     def get_child_locations(self, location):
         """Yield an iterable of child locations for a given Location/FoldScopeLocation object."""
         self.get_location_info(location)  # purely to check for location validity

diff --git a/graphql_compiler/tests/test_explain_info.py b/graphql_compiler/tests/test_explain_info.py
@@ -0,0 +1,132 @@
+# Copyright 2018-present Kensho Technologies, LLC.
+import unittest
+
+from . import test_input_data
+from ..compiler.compiler_frontend import graphql_to_ir
+from ..compiler.helpers import Location
+from ..compiler.metadata import FilterInfo, RecurseInfo
+from .test_helpers import get_schema
+
+
+class ExplainInfoTests(unittest.TestCase):
+    """Ensure we get correct information about filters and recursion."""
+
+    def setUp(self):
+        """Initialize the test schema once for all tests."""
+        self.schema = get_schema()
+
+    def check(self, graphql_test, expected_filters, expected_recurses):
+        """Verify query produces expected explain infos."""
+        ir_and_metadata = graphql_to_ir(self.schema, graphql_test().graphql_input)
+        meta = ir_and_metadata.query_metadata_table
+        # Unfortunately literal dicts don't accept Location() as keys
+        expected_filters = dict(expected_filters)
+        expected_recurses = dict(expected_recurses)
+        for location, _ in meta.registered_locations:
+            # Do filters match with expected for this location?
+            filters = meta.get_filter_infos(location)
+            self.assertEqual(expected_filters.get(location, []), filters)
+            if filters:
+                del expected_filters[location]
+            # Do recurse match with expected for this location?
+            recurse = meta.get_recurse_infos(location)
+            self.assertEqual(expected_recurses.get(location, []), recurse)
+            if recurse:
+                del expected_recurses[location]
+        # Any expected infos missing?
+        self.assertEqual(0, len(expected_filters))
+        self.assertEqual(0, len(expected_recurses))
+
+    def test_traverse_filter_and_output(self):
+        loc = Location(('Animal', 'out_Animal_ParentOf'), None, 1)
+        filters = [
+            FilterInfo(fields=('name', 'alias'), op_name='name_or_alias', args=('$wanted',)),
+        ]
+
+        self.check(test_input_data.traverse_filter_and_output,
+                   [(loc, filters)],
+                   [])
+
+    def test_complex_optional_traversal_variables(self):
+        loc1 = Location(('Animal',), None, 1)
+        filters1 = [
+            FilterInfo(fields=('name',), op_name='=', args=('$animal_name',)),
+        ]
+
+        loc2 = Location(('Animal', 'in_Animal_ParentOf', 'out_Animal_FedAt'), None, 1)
+        filters2 = [
+            FilterInfo(fields=('name',), op_name='=', args=('%parent_fed_at_event',)),
+            FilterInfo(fields=('event_date',),
+                       op_name='between',
+                       args=('%other_child_fed_at', '%parent_fed_at')),
+        ]
+
+        self.check(test_input_data.complex_optional_traversal_variables,
+                   [(loc1, filters1), (loc2, filters2)],
+                   [])
+
+    def test_coercion_filters_and_multiple_outputs_within_fold_scope(self):
+        self.check(test_input_data.coercion_filters_and_multiple_outputs_within_fold_scope,
+                   [],
+                   [])
+
+    def test_multiple_filters(self):
+        loc = Location(('Animal',), None, 1)
+        filters = [
+            FilterInfo(fields=('name',), op_name='>=', args=('$lower_bound',)),
+            FilterInfo(fields=('name',), op_name='<', args=('$upper_bound',))
+        ]
+
+        self.check(test_input_data.multiple_filters,
+                   [(loc, filters)],
+                   [])
+
+    def test_has_edge_degree_op_filter(self):
+        loc = Location(('Animal',), None, 1)
+        filters = [
+            FilterInfo(fields=('in_Animal_ParentOf',),
+                       op_name='has_edge_degree',
+                       args=('$child_count',))
+        ]
+
+        self.check(test_input_data.has_edge_degree_op_filter,
+                   [(loc, filters)],
+                   [])
+
+    def test_simple_recurse(self):
+        loc = Location(('Animal',), None, 1)
+        recurses = [
+            RecurseInfo(edge_direction='out', edge_name='Animal_ParentOf', depth=1)
+        ]
+
+        self.check(test_input_data.simple_recurse,
+                   [],
+                   [(loc, recurses)])
+
+    def test_two_consecutive_recurses(self):
+        loc = Location(('Animal',), None, 1)
+        filters = [
+            FilterInfo(fields=('name', 'alias'),
+                       op_name='name_or_alias',
+                       args=('$animal_name_or_alias',))
+        ]
+        recurses = [
+            RecurseInfo(edge_direction='out', edge_name='Animal_ParentOf', depth=2),
+            RecurseInfo(edge_direction='in', edge_name='Animal_ParentOf', depth=2)
+        ]
+
+        self.check(test_input_data.two_consecutive_recurses,
+                   [(loc, filters)],
+                   [(loc, recurses)])
+
+    def test_filter_on_optional_traversal_name_or_alias(self):
+        loc = Location(('Animal', 'out_Animal_ParentOf'), None, 1)
+        filters = [
+            FilterInfo(fields=('name', 'alias'),
+                       op_name='name_or_alias',
+                       args=('%grandchild_name',))
+        ]
+
+        self.check(test_input_data.filter_on_optional_traversal_name_or_alias,
+                   [(loc, filters)],
+                   [])