Skip to content

Commit

Permalink
♻️(search) build ES requests in an Indexer method
Browse files Browse the repository at this point in the history
We want to enable downstream developers to modify/extend/replace
search behavior as much as possible without specifying complex
interfaces between their code and ours.
In this case, the simplest way to let them customize ElasticSearch
queries is to delegate request building to the indexer so it can
be overridden to change the query behavior.
  • Loading branch information
mbenadda committed Jul 19, 2018
1 parent a7ac327 commit 1696320
Show file tree
Hide file tree
Showing 9 changed files with 234 additions and 214 deletions.
10 changes: 8 additions & 2 deletions src/richie/apps/search/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@
"""


class ApiConsumingException(Exception):
"""
Exception raised when an exception occurs during API walk using our api_consumption util
"""


class IndexerDataException(Exception):
"""
Exception raised when an Indexer fails to provide the data required for ES indexing
"""


class ApiConsumingException(Exception):
class QueryFormatException(Exception):
"""
Exception raised when an exception occurs during API walk using our api_consumption util
Exception raised when the query parameters for a GET list call are invalid
"""
40 changes: 39 additions & 1 deletion src/richie/apps/search/indexers/organizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""
from django.conf import settings

from ..exceptions import IndexerDataException
from ..exceptions import IndexerDataException, QueryFormatException
from ..forms import OrganizationListForm
from ..partial_mappings import MULTILINGUAL_TEXT
from ..utils.api_consumption import walk_api_json_list
from ..utils.i18n import get_best_field_language
Expand Down Expand Up @@ -66,3 +67,40 @@ def format_es_organization_for_api(es_organization, best_language):
es_organization["_source"]["name"], best_language
),
}

@staticmethod
def build_es_query(request):
"""
Build an ElasticSearch query and its related aggregations, to be consumed by the ES client
in the Organizations ViewSet
"""
# Instantiate a form with the request's query_params to check & sanitize them
query_params_form = OrganizationListForm(request.query_params)

# Raise an exception with error information if the query params are not valid
if not query_params_form.is_valid():
raise QueryFormatException(query_params_form.errors)

# Build a query that matches on the organization name field if it was passed by the client
# Note: test_elasticsearch_feature.py needs to be updated whenever the search call
# is updated and makes use new features.
if query_params_form.cleaned_data.get("query"):
query = {
"query": {
"match": {
"name.fr": {
"query": query_params_form.cleaned_data.get("query"),
"analyzer": "french",
}
}
}
}
# Build a match_all query by default
else:
query = {"query": {"match_all": {}}}

return (
query_params_form.cleaned_data.get("limit"),
query_params_form.cleaned_data.get("offset") or 0,
query,
)
40 changes: 39 additions & 1 deletion src/richie/apps/search/indexers/subjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""
from django.conf import settings

from ..exceptions import IndexerDataException
from ..exceptions import IndexerDataException, QueryFormatException
from ..forms import SubjectListForm
from ..partial_mappings import MULTILINGUAL_TEXT
from ..utils.api_consumption import walk_api_json_list
from ..utils.i18n import get_best_field_language
Expand Down Expand Up @@ -56,3 +57,40 @@ def format_es_subject_for_api(es_subject, best_language):
es_subject["_source"]["name"], best_language
),
}

@staticmethod
def build_es_query(request):
"""
Build an ElasticSearch query and its related aggregations, to be consumed by the ES client
in the Subjects ViewSet
"""
# Instantiate a form with our query_params to check & sanitize them
params_form = SubjectListForm(request.query_params)

# Raise an exception with error information if the query params are not valid
if not params_form.is_valid():
raise QueryFormatException(params_form.errors)

# Build a query that matches on the name field if it was handed by the client
# Note: test_elasticsearch_feature.py needs to be updated whenever the search call
# is updated and makes use new features.
if params_form.cleaned_data.get("query"):
query = {
"query": {
"match": {
"name.fr": {
"query": params_form.cleaned_data.get("query"),
"analyzer": "french",
}
}
}
}
# Build a match_all query by default
else:
query = {"query": {"match_all": {}}}

return (
params_form.cleaned_data.get("limit"),
params_form.cleaned_data.get("offset") or 0,
query,
)
50 changes: 15 additions & 35 deletions src/richie/apps/search/viewsets/organizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from rest_framework.response import Response
from rest_framework.viewsets import ViewSet

from ..forms import OrganizationListForm
from ..exceptions import QueryFormatException
from ..indexers.organizations import OrganizationsIndexer


Expand All @@ -24,57 +24,37 @@ def list(self, request, version):
Organization search endpoint: pass query params to ElasticSearch so it filters
organizations and returns a list of matching items
"""
# Instantiate a form with our query_params to check & sanitize them
query_params_form = OrganizationListForm(request.query_params)

# Return a 400 with error information if the query params are not as expected
if not query_params_form.is_valid():
return Response(status=400, data={"errors": query_params_form.errors})

# Build a query that matches on the organization name field if it was passed by the client
# Note: test_elasticsearch_feature.py needs to be updated whenever the search call
# is updated and makes use new features.
if query_params_form.cleaned_data.get("query"):
search_payload = {
"query": {
"match": {
"name.fr": {
"query": query_params_form.cleaned_data.get("query"),
"analyzer": "french",
}
}
}
}
# Build a match_all query by default
else:
search_payload = {"query": {"match_all": {}}}
try:
limit, offset, query = OrganizationsIndexer.build_es_query(request)
except QueryFormatException as exc:
# Return a 400 with error information if the query params are not as expected
return Response(status=400, data={"errors": exc.args[0]})

query_response = settings.ES_CLIENT.search(
search_query_response = settings.ES_CLIENT.search(
index=OrganizationsIndexer.index_name,
doc_type=OrganizationsIndexer.document_type,
body=search_payload,
body=query,
# Directly pass meta-params through as arguments to the ES client
from_=query_params_form.cleaned_data.get("offset") or 0,
size=query_params_form.cleaned_data.get("limit")
or settings.ES_DEFAULT_PAGE_SIZE,
from_=offset,
size=limit or settings.ES_DEFAULT_PAGE_SIZE,
)

# Format the response in a consumer-friendly way
# NB: if there are 0 hits the query_response is formatted the exact same way, only the
# NB: if there are 0 hits the query response is formatted the exact same way, only the
# .hits.hits array is empty.
response_object = {
"meta": {
"count": len(query_response["hits"]["hits"]),
"offset": query_params_form.cleaned_data.get("offset") or 0,
"total_count": query_response["hits"]["total"],
"count": len(search_query_response["hits"]["hits"]),
"offset": offset,
"total_count": search_query_response["hits"]["total"],
},
"objects": [
OrganizationsIndexer.format_es_organization_for_api(
organization,
# Get the best language to return multilingual fields
get_language_from_request(request),
)
for organization in query_response["hits"]["hits"]
for organization in search_query_response["hits"]["hits"]
],
}

Expand Down
39 changes: 10 additions & 29 deletions src/richie/apps/search/viewsets/subjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from rest_framework.response import Response
from rest_framework.viewsets import ViewSet

from ..forms import SubjectListForm
from ..exceptions import QueryFormatException
from ..indexers.subjects import SubjectsIndexer


Expand All @@ -24,38 +24,19 @@ def list(self, request, version):
Subject search endpoint: pass query params to ElasticSearch so it filters subjects
and returns a list of matching items
"""
# Instantiate a form with our query_params to check & sanitize them
params_form = SubjectListForm(request.query_params)

# Return a 400 with error information if the query params are not as expected
if not params_form.is_valid():
return Response(status=400, data={"errors": params_form.errors})

# Build a query that matches on the name field if it was handed by the client
# Note: test_elasticsearch_feature.py needs to be updated whenever the search call
# is updated and makes use new features.
if params_form.cleaned_data.get("query"):
search_body = {
"query": {
"match": {
"name.fr": {
"query": params_form.cleaned_data.get("query"),
"analyzer": "french",
}
}
}
}
# Build a match_all query by default
else:
search_body = {"query": {"match_all": {}}}
try:
limit, offset, query = SubjectsIndexer.build_es_query(request)
except QueryFormatException as exc:
# Return a 400 with error information if the query params are not as expected
return Response(status=400, data={"errors": exc.args[0]})

query_response = settings.ES_CLIENT.search(
index=SubjectsIndexer.index_name,
doc_type=SubjectsIndexer.document_type,
body=search_body,
body=query,
# Directly pass meta-params through as arguments to the ES client
from_=params_form.cleaned_data.get("offset") or 0,
size=params_form.cleaned_data.get("limit") or settings.ES_DEFAULT_PAGE_SIZE,
from_=offset,
size=limit or settings.ES_DEFAULT_PAGE_SIZE,
)

# Format the response in a consumer-friendly way
Expand All @@ -64,7 +45,7 @@ def list(self, request, version):
response_object = {
"meta": {
"count": len(query_response["hits"]["hits"]),
"offset": params_form.cleaned_data.get("offset") or 0,
"offset": offset,
"total_count": query_response["hits"]["total"],
},
"objects": [
Expand Down
48 changes: 47 additions & 1 deletion tests/apps/search/test_indexers_organizations.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""
Tests for the organization indexer
"""
from types import SimpleNamespace

from django.conf import settings
from django.test import TestCase

import responses

from richie.apps.search.exceptions import IndexerDataException
from richie.apps.search.exceptions import IndexerDataException, QueryFormatException
from richie.apps.search.indexers.organizations import OrganizationsIndexer


Expand Down Expand Up @@ -141,3 +143,47 @@ def test_format_es_organization_for_api(self):
"name": "University of Paris XIII",
},
)

def test_build_es_query_search_all_organizations(self):
"""
Happy path: the expected ES query object is returned
"""
request = SimpleNamespace(query_params={"limit": 11, "offset": 4})
self.assertEqual(
OrganizationsIndexer.build_es_query(request),
(11, 4, {"query": {"match_all": {}}}),
)

def test_build_es_query_search_by_name(self):
"""
Happy path: the expected ES query object is returned
"""
request = SimpleNamespace(
query_params={"limit": 12, "offset": 3, "query": "user entered some text"}
)
self.assertEqual(
OrganizationsIndexer.build_es_query(request),
(
12,
3,
{
"query": {
"match": {
"name.fr": {
"query": "user entered some text",
"analyzer": "french",
}
}
}
},
),
)

def test_build_es_query_with_invalid_params(self):
"""
Error case: the request contained invalid parameters
"""
with self.assertRaises(QueryFormatException):
OrganizationsIndexer.build_es_query(
SimpleNamespace(query_params={"limit": "invalid input"})
)
Loading

0 comments on commit 1696320

Please sign in to comment.