Skip to content

Commit

Permalink
added object field support to elasticsearch query builder. Consequent…
Browse files Browse the repository at this point in the history
…ly updated the nestedfield checker. Rework elasticsearch query builder code to deport more logic in visitor.py
  • Loading branch information
alexgarel committed Sep 7, 2017
1 parent dabd965 commit 8738b65
Show file tree
Hide file tree
Showing 10 changed files with 837 additions and 464 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,21 @@ and this project tries to adhere to [Semantic Versioning](http://semver.org/spec
Unreleased
============

Added
-----

- Manage object fields in elasicsearch transformation

Fixed
-----

- minor fix, getting better error message when parsing error is at the end of content

Changed
--------

- better handling of nested fields may lead to shorter requests

0.5.3 - 2017-08-21
==================

Expand Down
19 changes: 11 additions & 8 deletions docs/source/quick_start.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,14 @@ We may also pass default operator, and default fields::
... {'range': {'published': {'lte': '1990-01-01T00:00:00.000Z'}}},
... {'term': {'tag': {'value': 'fable'}}}]}})

You may also use nested fields::
You may also use nested fields or object fields::

>>> es_builder = ElasticsearchQueryBuilder(
... nested_fields={"author": {"given_name", "last_name"}})
... nested_fields={"authors": {"given_name", "last_name", "city"}},
... object_fields=["authors.city.name"])
>>> tree = parser.parse('''
... title:"quick brown fox" AND
... author:(given_name:Ja* AND last_name:London)
... authors:(given_name:Ja* AND last_name:London AND city.name:"San Francisco")
... ''')
>>> query = es_builder(tree)
>>> t.assertDictEqual(
Expand All @@ -130,18 +131,20 @@ You may also use nested fields::
... {'nested': {
... 'query': {'bool': {'must': [
... {'query_string': {
... 'default_field': 'author.given_name',
... 'default_field': 'authors.given_name',
... 'analyze_wildcard': True,
... 'query': 'Ja*',
... 'allow_leading_wildcard': True}},
... {'match': {'author.last_name': {
... {'match': {'authors.last_name': {
... 'query': 'London',
... 'type': 'phrase',
... 'zero_terms_query': 'all'}}}]}},
... 'path': 'author'}}]}})
... 'zero_terms_query': 'all'}}},
... {'match_phrase': {'authors.city.name': {
... 'query': 'San Francisco'}}}]}},
... 'path': 'authors'}}]}})

You can use this JSON directly with `elasticsearch python bindings`_,
but also use it to build query with `elasticsearch_dsl`_.
but also use it to build a query with `elasticsearch_dsl`_.

.. note::
The list of terms fields could, of course,
Expand Down
2 changes: 1 addition & 1 deletion luqum/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-

__version__ = '0.5.3'
__version__ = '0.6.0-alpha'
__version_info__ = tuple(__version__.split('.'))
84 changes: 46 additions & 38 deletions luqum/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import re

from . import tree
from .exceptions import NestedSearchFieldException
from .utils import LuceneTreeVisitorV2, normalize_nested_fields_specs
from .exceptions import NestedSearchFieldException, ObjectSearchFieldException
from .utils import LuceneTreeVisitorV2
from .utils import flatten_nested_fields_specs, normalize_object_fields_specs


def camel_to_lower(name):
Expand Down Expand Up @@ -146,11 +147,17 @@ class CheckNestedFields(LuceneTreeVisitorV2):
:param nested_fields: a dict where keys are name of nested fields,
values are dict of sub-nested fields or an empty dict for leaf
:param object_fields:
this is either None, in which case unknown object fields will be accepted,
or a dict of sub-nested fields (like nested_fields)
"""

def __init__(self, nested_fields):
def __init__(self, nested_fields, object_fields=None):
assert(isinstance(nested_fields, dict))
self.nested_fields = normalize_nested_fields_specs(nested_fields)
self.object_fields = normalize_object_fields_specs(object_fields)
self.object_prefixes = set(k.rsplit(".", 1)[0] for k in self.object_fields or [])
self.nested_fields = flatten_nested_fields_specs(nested_fields)
self.nested_prefixes = set(k.rsplit(".", 1)[0] for k in self.nested_fields)

def generic_visit(self, node, parents, context):
"""
Expand All @@ -159,50 +166,51 @@ def generic_visit(self, node, parents, context):
for child in node.children:
self.visit(child, parents + [node], context)

def _recurse_nested_fields(self, node, context, parents):
names = node.name.split(".")
nested_fields = context["nested_fields"]
current_field = context["current_field"]
for name in names:
if name in nested_fields:
# recurse
nested_fields = nested_fields[name]
current_field = name
elif current_field is not None: # we are inside another field
if nested_fields:
# calling an unknown field inside a nested one
raise NestedSearchFieldException(
'"{sub}" is not a subfield of "{field}" in "{expr}"'
.format(sub=name, field=current_field, expr=str(parents[-1])))
else:
# calling a field inside a non nested
raise NestedSearchFieldException(
'''"{sub}" can't be nested in "{field}" in "{expr}"'''
.format(sub=name, field=current_field, expr=str(parents[-1])))
else:
# not a nested field, so no nesting any more
nested_fields = {}
current_field = name
return {"nested_fields": nested_fields, "current_field": current_field}

def visit_search_field(self, node, parents, context):
"""
On search field node, check nested fields logic
"""
context = dict(context) # copy
context.update(self._recurse_nested_fields(node, context, parents))
child_context = dict(context) # copy
child_context["prefix"] = context["prefix"] + node.name.split(".")
for child in node.children:
self.visit(child, parents + [node], context)
self.visit(child, parents + [node], child_context)

def _check_final_operation(self, node, context):
prefix = context["prefix"]
if prefix:
fullname = ".".join(prefix)
if fullname in self.nested_prefixes:
raise NestedSearchFieldException(
('''"{expr}" can't be directly attributed to "{field}"''' +
''' as it is a nested field''')
.format(expr=str(node), field=fullname))
elif fullname in self.object_prefixes:
raise NestedSearchFieldException(
('''"{expr}" can't be directly attributed to "{field}"''' +
''' as it is an object field''')
.format(expr=str(node), field=fullname))
elif len(prefix) > 1:
unknown_field = (
self.object_fields is not None and
fullname not in self.object_fields and
fullname not in self.nested_fields)
if unknown_field:
raise ObjectSearchFieldException(
'''"{expr}" attributed to unknown nested or object field "{field}"'''
.format(expr=str(node), field=fullname))

def visit_phrase(self, node, parents, context):
"""
On phrase field, verify term is in a final search field
"""
self._check_final_operation(node, context)

def visit_term(self, node, parents, context):
"""
On term field, verify term is in a final search field
"""
if context["nested_fields"] and context["current_field"]:
raise NestedSearchFieldException(
'''"{expr}" can't be directly attributed to "{field}" as it is a nested field'''
.format(expr=str(node), field=context["current_field"]))
self._check_final_operation(node, context)

def __call__(self, tree):
context = {"nested_fields": self.nested_fields, "current_field": None}
context = {"prefix": []}
return self.visit(tree, context=context)
Loading

0 comments on commit 8738b65

Please sign in to comment.