Skip to content

Commit

Permalink
Merge branch 'bulk_embedded_documents_resolution_#1031'
Browse files Browse the repository at this point in the history
Closes #1029.
  • Loading branch information
nicolaiarocci committed Jun 21, 2017
2 parents d5ec458 + 4eeb9fc commit dfb2225
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 44 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Expand Up @@ -9,6 +9,7 @@ Development Lead
Patches and Contributions
`````````````````````````
- Alexander Hendorf
- Amedeo91
- Andreas Røssland
- Andrés Martano
- Antonio Lourenco
Expand Down
2 changes: 2 additions & 0 deletions CHANGES
Expand Up @@ -8,6 +8,8 @@ Development

Version 0.8
~~~~~~~~~~~
- Performance improved on retrieving a list of embedded documents. Closes
#1029 (Amedeo91).
- New: ``JSON_REQUEST_CONTENT_TYPES`` or supported JSON content types. Useful
when you need support for vendor-specific json types. Please note: responses
will still carry the standard ``application/json`` type. Defaults to
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Expand Up @@ -13,4 +13,4 @@ Sphinx==1.2.3
tox==2.4.1
wheel==0.24.0
testfixtures==4.1.2
alabaster==0.7.10
alabaster==0.7.10
18 changes: 9 additions & 9 deletions eve/io/mongo/mongo.py
Expand Up @@ -178,10 +178,10 @@ def find(self, resource, req, sub_resource_lookup):
"""
args = dict()

if req.max_results:
if req and req.max_results:
args['limit'] = req.max_results

if req.page > 1:
if req and req.page > 1:
args['skip'] = (req.page - 1) * req.max_results

# TODO sort syntax should probably be coherent with 'where': either
Expand All @@ -194,7 +194,7 @@ def find(self, resource, req, sub_resource_lookup):
client_sort = {}
spec = {}

if req.sort:
if req and req.sort:
try:
# assume it's mongo syntax (ie. ?sort=[("name", 1)])
client_sort = ast.literal_eval(req.sort)
Expand All @@ -213,7 +213,7 @@ def find(self, resource, req, sub_resource_lookup):
self.app.logger.exception(e)
abort(400, description=debug_error_message(str(e)))

if req.where:
if req and req.where:
try:
spec = self._sanitize(json.loads(req.where))
except HTTPException as e:
Expand All @@ -235,13 +235,13 @@ def find(self, resource, req, sub_resource_lookup):
if sub_resource_lookup:
spec = self.combine_queries(spec, sub_resource_lookup)

if config.DOMAIN[resource]['soft_delete'] and not req.show_deleted:
if config.DOMAIN[resource]['soft_delete'] \
and not (req and req.show_deleted) \
and not self.query_contains_field(spec, config.DELETED):
# Soft delete filtering applied after validate_filters call as
# querying against the DELETED field must always be allowed when
# soft_delete is enabled
if not self.query_contains_field(spec, config.DELETED):
spec = self.combine_queries(
spec, {config.DELETED: {"$ne": True}})
spec = self.combine_queries(spec, {config.DELETED: {"$ne": True}})

spec = self._mongotize(spec, resource)

Expand All @@ -253,7 +253,7 @@ def find(self, resource, req, sub_resource_lookup):
client_projection,
client_sort)

if req.if_modified_since:
if req and req.if_modified_since:
spec[config.LAST_UPDATED] = \
{'$gt': req.if_modified_since}

Expand Down
184 changes: 150 additions & 34 deletions eve/methods/common.py
Expand Up @@ -10,6 +10,10 @@
:license: BSD, see LICENSE for more details.
"""
import base64
try:
from collections import Counter
except:
from backport_collections import Counter
import simplejson as json
import time

Expand Down Expand Up @@ -673,7 +677,7 @@ def resolve_embedded_fields(resource, req):
return enabled_embedded_fields


def embedded_document(reference, data_relation, field_name):
def embedded_document(references, data_relation, field_name):
""" Returns a document to be embedded by reference using data_relation
taking into account document versions
Expand All @@ -683,40 +687,156 @@ def embedded_document(reference, data_relation, field_name):
.. versionadded:: 0.5
"""
embedded_docs = []

output_is_list = True

if not isinstance(references, list):
output_is_list = False
references = [references]

# Retrieve and serialize the requested document
if 'version' in data_relation and data_relation['version'] is True:
# grab the specific version
embedded_doc = get_data_version_relation_document(
data_relation, reference)

# grab the latest version
latest_embedded_doc = get_data_version_relation_document(
data_relation, reference, latest=True)

# make sure we got the documents
if embedded_doc is None or latest_embedded_doc is None:
# your database is not consistent!!! that is bad
# TODO: we should notify the developers with a log.
abort(404, description=debug_error_message(
"Unable to locate embedded documents for '%s'" %
field_name
))

build_response_document(embedded_doc, data_relation['resource'],
[], latest_embedded_doc)
# For the version flow, I keep the as-is logic (flow is too complex to make it bulk)
for reference in references:
# grab the specific version
embedded_doc = get_data_version_relation_document(
data_relation, reference)

# grab the latest version
latest_embedded_doc = get_data_version_relation_document(
data_relation, reference, latest=True)

# make sure we got the documents
if embedded_doc is None or latest_embedded_doc is None:
# your database is not consistent!!! that is bad
# TODO: we should notify the developers with a log.
abort(404, description=debug_error_message(
"Unable to locate embedded documents for '%s'" %
field_name
))

build_response_document(embedded_doc, data_relation['resource'],
[], latest_embedded_doc)
embedded_docs.append(embedded_doc)
else:
id_value_to_sort, list_of_id_field_name, subresources_query = generate_query_and_sorting_criteria(data_relation,
references)
for subresource in subresources_query:
list_embedded_doc = list(app.data.find(subresource,
None,
subresources_query[subresource]))
if not list_embedded_doc:
embedded_docs.extend([None] *
len(subresources_query[subresource]["$or"]))
else:
for embedded_doc in list_embedded_doc:
resolve_media_files(embedded_doc, subresource)
embedded_docs.extend(list_embedded_doc)

# After having retrieved my data, I have to be sure that the sorting of the
# list is the same in input as in output (this is to support embedding of
# sub-documents - only in case the storage is not done via DBref)
if embedded_docs:
embedded_docs = sort_db_response(embedded_docs, id_value_to_sort, list_of_id_field_name)

if output_is_list:
return embedded_docs
elif embedded_docs:
return embedded_docs[0]
else:
return None


def sort_db_response(embedded_docs, id_value_to_sort, list_of_id_field_name):
""" Sorts the documents fetched from the database
:param embedded_docs: the documents fetch from the database.
:param id_value_to_sort: id_value sort criteria.
:param list_of_id_field_name: list of name of fields
:return embedded_docs: the list of documents sorted as per input
"""

id_field_name_occurrences = Counter(list_of_id_field_name)
temp_embedded_docs = []
old_occurrence = 0

for id_field_name in set(list_of_id_field_name):
current_occurrence = old_occurrence + int(id_field_name_occurrences[id_field_name])
temp_embedded_docs.extend(
sort_per_resource(embedded_docs[old_occurrence:current_occurrence],
id_value_to_sort,
id_field_name))
old_occurrence = current_occurrence

return temp_embedded_docs


def sort_per_resource(embedded_docs, id_value_to_sort, id_field_name):
""" Sorts the documents fetched from the database per single resource
:param embedded_docs: the documents fetch from the database.
:param id_value_to_sort: id_value sort criteria.
:param list_of_id_field_name: list of name of fields
:return embedded_docs: the list of documents sorted as per input
"""
# Removing None
number_of_none = embedded_docs.count(None)
if number_of_none:
embedded_docs = [x for x in embedded_docs if x is not None]
id2dict = dict((d[id_field_name], d) for d in embedded_docs)
temporary_embedded_docs = []
if number_of_none:
for id_value_ in id_value_to_sort:
if id_value_ in id2dict:
temporary_embedded_docs.append(id2dict[id_value_])
else:
temporary_embedded_docs.append(None)
return embedded_docs


def generate_query_and_sorting_criteria(data_relation, references):
""" Generate query and sorting critiria
:param data_relation: data relation for the resource.
:param references: DBRef or id to use to embed the document.
:returns id_value_to_sort: list of ids to use in the sort
list_of_id_field_name: list of field name (important only for DBRef)
subresources_query: the list of query to perform per resource
(in case is not DBRef, it will be only one query)
"""
query = {"$or": []}
subresources_query = {}
old_subresource = ""
id_value_to_sort = []
# id_field name should be the same for
# all the elements in the list
list_of_id_field_name = []
for counter, reference in enumerate(references):
# if reference is DBRef take the referenced collection as subresource
# NOTE: using DBRef, I can define several resource for each link
subresource = reference.collection if isinstance(reference, DBRef) \
else data_relation['resource']
id_field = config.DOMAIN[subresource]['id_field']
embedded_doc = app.data.find_one(subresource, None,
**{id_field: reference.id
if isinstance(reference, DBRef)
else reference})
if embedded_doc:
resolve_media_files(embedded_doc, subresource)

return embedded_doc
if old_subresource and old_subresource != subresource:
add_query_to_list(query, subresource, subresources_query)
# NOTE: in case it is a DBRef link, the id_field_name is always the _id
# regardless the Eve set-up
id_field_name = "_id" if isinstance(reference, DBRef) \
else config.DOMAIN[subresource]['id_field']
id_field_value = reference.id \
if isinstance(reference, DBRef) else reference
query["$or"].append({id_field_name: id_field_value})
id_value_to_sort.append(id_field_value)
list_of_id_field_name.append(id_field_name)
if counter == len(references) - 1:
add_query_to_list(query, subresource, subresources_query)
return id_value_to_sort, list_of_id_field_name, subresources_query


def add_query_to_list(query, subresource, subresource_query):
subresource_query.update({subresource: copy(query)})
query.clear()
query["$or"] = []


def subdocuments(fields_chain, resource, document):
Expand Down Expand Up @@ -788,11 +908,7 @@ def resolve_embedded_documents(document, resource, embedded_fields):
for subdocument in subdocuments(fields_chain[:-1], resource, document):
if last_field not in subdocument:
continue
if isinstance(subdocument[last_field], list):
subdocument[last_field] = list(map(getter,
subdocument[last_field]))
else:
subdocument[last_field] = getter(subdocument[last_field])
subdocument[last_field] = getter(subdocument[last_field])


def resolve_media_files(document, resource):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -7,3 +7,4 @@ MarkupSafe==0.23
pymongo==3.4.0
simplejson==3.8.2
Werkzeug==0.11.15
backport_collections==0.1
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -15,6 +15,7 @@
'itsdangerous>=0.24,<1.0',
'flask>=0.10.1,<=0.12',
'pymongo>=3.4',
'backport_collections>=0.1',
]

try:
Expand Down

0 comments on commit dfb2225

Please sign in to comment.