diff --git a/AUTHORS b/AUTHORS index d1269ac75..b4a313731 100644 --- a/AUTHORS +++ b/AUTHORS @@ -9,6 +9,7 @@ Development Lead Patches and Contributions ````````````````````````` - Alexander Hendorf +- Amedeo91 - Andreas Røssland - Andrés Martano - Antonio Lourenco diff --git a/CHANGES b/CHANGES index 5c08ba992..a05f60424 100644 --- a/CHANGES +++ b/CHANGES @@ -8,6 +8,8 @@ Development Version 0.8 ~~~~~~~~~~~ +- Performance improved on retrieving a list of embedded documents. Closes + #1029 (Amedeo91). - New: ``JSON_REQUEST_CONTENT_TYPES`` or supported JSON content types. Useful when you need support for vendor-specific json types. Please note: responses will still carry the standard ``application/json`` type. Defaults to diff --git a/dev-requirements.txt b/dev-requirements.txt index 0f1bef02d..6ed6c6f89 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -13,4 +13,4 @@ Sphinx==1.2.3 tox==2.4.1 wheel==0.24.0 testfixtures==4.1.2 -alabaster==0.7.10 +alabaster==0.7.10 \ No newline at end of file diff --git a/eve/io/mongo/mongo.py b/eve/io/mongo/mongo.py index e196b0b6e..0ad0090a2 100644 --- a/eve/io/mongo/mongo.py +++ b/eve/io/mongo/mongo.py @@ -178,10 +178,10 @@ def find(self, resource, req, sub_resource_lookup): """ args = dict() - if req.max_results: + if req and req.max_results: args['limit'] = req.max_results - if req.page > 1: + if req and req.page > 1: args['skip'] = (req.page - 1) * req.max_results # TODO sort syntax should probably be coherent with 'where': either @@ -194,7 +194,7 @@ def find(self, resource, req, sub_resource_lookup): client_sort = {} spec = {} - if req.sort: + if req and req.sort: try: # assume it's mongo syntax (ie. ?sort=[("name", 1)]) client_sort = ast.literal_eval(req.sort) @@ -213,7 +213,7 @@ def find(self, resource, req, sub_resource_lookup): self.app.logger.exception(e) abort(400, description=debug_error_message(str(e))) - if req.where: + if req and req.where: try: spec = self._sanitize(json.loads(req.where)) except HTTPException as e: @@ -235,13 +235,13 @@ def find(self, resource, req, sub_resource_lookup): if sub_resource_lookup: spec = self.combine_queries(spec, sub_resource_lookup) - if config.DOMAIN[resource]['soft_delete'] and not req.show_deleted: + if config.DOMAIN[resource]['soft_delete'] \ + and not (req and req.show_deleted) \ + and not self.query_contains_field(spec, config.DELETED): # Soft delete filtering applied after validate_filters call as # querying against the DELETED field must always be allowed when # soft_delete is enabled - if not self.query_contains_field(spec, config.DELETED): - spec = self.combine_queries( - spec, {config.DELETED: {"$ne": True}}) + spec = self.combine_queries(spec, {config.DELETED: {"$ne": True}}) spec = self._mongotize(spec, resource) @@ -253,7 +253,7 @@ def find(self, resource, req, sub_resource_lookup): client_projection, client_sort) - if req.if_modified_since: + if req and req.if_modified_since: spec[config.LAST_UPDATED] = \ {'$gt': req.if_modified_since} diff --git a/eve/methods/common.py b/eve/methods/common.py index 43006600d..b22d05bb8 100644 --- a/eve/methods/common.py +++ b/eve/methods/common.py @@ -10,6 +10,10 @@ :license: BSD, see LICENSE for more details. """ import base64 +try: + from collections import Counter +except: + from backport_collections import Counter import simplejson as json import time @@ -673,7 +677,7 @@ def resolve_embedded_fields(resource, req): return enabled_embedded_fields -def embedded_document(reference, data_relation, field_name): +def embedded_document(references, data_relation, field_name): """ Returns a document to be embedded by reference using data_relation taking into account document versions @@ -683,40 +687,156 @@ def embedded_document(reference, data_relation, field_name): .. versionadded:: 0.5 """ + embedded_docs = [] + + output_is_list = True + + if not isinstance(references, list): + output_is_list = False + references = [references] + # Retrieve and serialize the requested document if 'version' in data_relation and data_relation['version'] is True: - # grab the specific version - embedded_doc = get_data_version_relation_document( - data_relation, reference) - - # grab the latest version - latest_embedded_doc = get_data_version_relation_document( - data_relation, reference, latest=True) - - # make sure we got the documents - if embedded_doc is None or latest_embedded_doc is None: - # your database is not consistent!!! that is bad - # TODO: we should notify the developers with a log. - abort(404, description=debug_error_message( - "Unable to locate embedded documents for '%s'" % - field_name - )) - - build_response_document(embedded_doc, data_relation['resource'], - [], latest_embedded_doc) + # For the version flow, I keep the as-is logic (flow is too complex to make it bulk) + for reference in references: + # grab the specific version + embedded_doc = get_data_version_relation_document( + data_relation, reference) + + # grab the latest version + latest_embedded_doc = get_data_version_relation_document( + data_relation, reference, latest=True) + + # make sure we got the documents + if embedded_doc is None or latest_embedded_doc is None: + # your database is not consistent!!! that is bad + # TODO: we should notify the developers with a log. + abort(404, description=debug_error_message( + "Unable to locate embedded documents for '%s'" % + field_name + )) + + build_response_document(embedded_doc, data_relation['resource'], + [], latest_embedded_doc) + embedded_docs.append(embedded_doc) else: + id_value_to_sort, list_of_id_field_name, subresources_query = generate_query_and_sorting_criteria(data_relation, + references) + for subresource in subresources_query: + list_embedded_doc = list(app.data.find(subresource, + None, + subresources_query[subresource])) + if not list_embedded_doc: + embedded_docs.extend([None] * + len(subresources_query[subresource]["$or"])) + else: + for embedded_doc in list_embedded_doc: + resolve_media_files(embedded_doc, subresource) + embedded_docs.extend(list_embedded_doc) + + # After having retrieved my data, I have to be sure that the sorting of the + # list is the same in input as in output (this is to support embedding of + # sub-documents - only in case the storage is not done via DBref) + if embedded_docs: + embedded_docs = sort_db_response(embedded_docs, id_value_to_sort, list_of_id_field_name) + + if output_is_list: + return embedded_docs + elif embedded_docs: + return embedded_docs[0] + else: + return None + + +def sort_db_response(embedded_docs, id_value_to_sort, list_of_id_field_name): + """ Sorts the documents fetched from the database + + :param embedded_docs: the documents fetch from the database. + :param id_value_to_sort: id_value sort criteria. + :param list_of_id_field_name: list of name of fields + :return embedded_docs: the list of documents sorted as per input + """ + + id_field_name_occurrences = Counter(list_of_id_field_name) + temp_embedded_docs = [] + old_occurrence = 0 + + for id_field_name in set(list_of_id_field_name): + current_occurrence = old_occurrence + int(id_field_name_occurrences[id_field_name]) + temp_embedded_docs.extend( + sort_per_resource(embedded_docs[old_occurrence:current_occurrence], + id_value_to_sort, + id_field_name)) + old_occurrence = current_occurrence + + return temp_embedded_docs + + +def sort_per_resource(embedded_docs, id_value_to_sort, id_field_name): + """ Sorts the documents fetched from the database per single resource + + :param embedded_docs: the documents fetch from the database. + :param id_value_to_sort: id_value sort criteria. + :param list_of_id_field_name: list of name of fields + :return embedded_docs: the list of documents sorted as per input + """ + # Removing None + number_of_none = embedded_docs.count(None) + if number_of_none: + embedded_docs = [x for x in embedded_docs if x is not None] + id2dict = dict((d[id_field_name], d) for d in embedded_docs) + temporary_embedded_docs = [] + if number_of_none: + for id_value_ in id_value_to_sort: + if id_value_ in id2dict: + temporary_embedded_docs.append(id2dict[id_value_]) + else: + temporary_embedded_docs.append(None) + return embedded_docs + + +def generate_query_and_sorting_criteria(data_relation, references): + """ Generate query and sorting critiria + + :param data_relation: data relation for the resource. + :param references: DBRef or id to use to embed the document. + :returns id_value_to_sort: list of ids to use in the sort + list_of_id_field_name: list of field name (important only for DBRef) + subresources_query: the list of query to perform per resource + (in case is not DBRef, it will be only one query) + """ + query = {"$or": []} + subresources_query = {} + old_subresource = "" + id_value_to_sort = [] + # id_field name should be the same for + # all the elements in the list + list_of_id_field_name = [] + for counter, reference in enumerate(references): # if reference is DBRef take the referenced collection as subresource + # NOTE: using DBRef, I can define several resource for each link subresource = reference.collection if isinstance(reference, DBRef) \ else data_relation['resource'] - id_field = config.DOMAIN[subresource]['id_field'] - embedded_doc = app.data.find_one(subresource, None, - **{id_field: reference.id - if isinstance(reference, DBRef) - else reference}) - if embedded_doc: - resolve_media_files(embedded_doc, subresource) - - return embedded_doc + if old_subresource and old_subresource != subresource: + add_query_to_list(query, subresource, subresources_query) + # NOTE: in case it is a DBRef link, the id_field_name is always the _id + # regardless the Eve set-up + id_field_name = "_id" if isinstance(reference, DBRef) \ + else config.DOMAIN[subresource]['id_field'] + id_field_value = reference.id \ + if isinstance(reference, DBRef) else reference + query["$or"].append({id_field_name: id_field_value}) + id_value_to_sort.append(id_field_value) + list_of_id_field_name.append(id_field_name) + if counter == len(references) - 1: + add_query_to_list(query, subresource, subresources_query) + return id_value_to_sort, list_of_id_field_name, subresources_query + + +def add_query_to_list(query, subresource, subresource_query): + subresource_query.update({subresource: copy(query)}) + query.clear() + query["$or"] = [] def subdocuments(fields_chain, resource, document): @@ -788,11 +908,7 @@ def resolve_embedded_documents(document, resource, embedded_fields): for subdocument in subdocuments(fields_chain[:-1], resource, document): if last_field not in subdocument: continue - if isinstance(subdocument[last_field], list): - subdocument[last_field] = list(map(getter, - subdocument[last_field])) - else: - subdocument[last_field] = getter(subdocument[last_field]) + subdocument[last_field] = getter(subdocument[last_field]) def resolve_media_files(document, resource): diff --git a/requirements.txt b/requirements.txt index 7081c0442..8cc2b3fb9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ MarkupSafe==0.23 pymongo==3.4.0 simplejson==3.8.2 Werkzeug==0.11.15 +backport_collections==0.1 diff --git a/setup.py b/setup.py index 830136e90..fe68bcabd 100755 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ 'itsdangerous>=0.24,<1.0', 'flask>=0.10.1,<=0.12', 'pymongo>=3.4', + 'backport_collections>=0.1', ] try: