From 107dd8dac2c222d2ef1af7de694a57722cb31062 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Thu, 24 Aug 2017 12:51:22 -0500 Subject: [PATCH 1/3] Add info_exists key, fix count bug --- api/handlers/dataexplorerhandler.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/api/handlers/dataexplorerhandler.py b/api/handlers/dataexplorerhandler.py index 4d37690f8..b3ed416d2 100644 --- a/api/handlers/dataexplorerhandler.py +++ b/api/handlers/dataexplorerhandler.py @@ -453,7 +453,7 @@ def get_facets(self): return_type, filters, search_string = self._parse_request(request_type='facet') facets_q = copy.deepcopy(FACET_QUERY) - facets_q['query'] = self._construct_query(return_type, search_string, filters)['query'] + facets_q['query'] = self._construct_query(None, search_string, filters)['query'] # if the query comes back with a return_type agg, remove it facets_q['query'].pop('aggs', None) @@ -584,6 +584,13 @@ def _construct_exact_query(self, return_type, search_string, filters, size=100): } } + if return_type == 'file': + query['script_fields'] = { + "info_exists" : { + "script" : "(params['_source'].containsKey('file') && params['_source']['file'].containsKey('info') && !params['_source']['file']['info'].empty)" + } + } + # Add search_string to "match on _all fields" query, otherwise remove unneeded logic if search_string: query['query']['bool']['must']['match']['_all'] = search_string @@ -610,7 +617,7 @@ def _run_query(self, es_query, result_type): def _process_results(self, results, result_type): if result_type in EXACT_CONTAINERS: - return self._process_exact_results(results) + return self._process_exact_results(results, result_type) else: containers = results['aggregations']['by_container']['buckets'] modified_results = [] @@ -618,8 +625,17 @@ def _process_results(self, results, result_type): modified_results.append(c['by_top_hit']['hits']['hits'][0]) return modified_results - def _process_exact_results(self, results): - return results['hits']['hits'] + def _process_exact_results(self, results, result_type): + results = results['hits']['hits'] + if result_type == 'file': + + # Note: At some point this would be better suited + # as an indexed field rather than scripted on the fly + for r in results: + fields = r.pop('fields', {}) + r['_source']['file']['info_exists'] = fields.get('info_exists')[0] + + return results From b363d4139c82227dc9c429901a2c78987a3295cd Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Thu, 24 Aug 2017 13:23:19 -0500 Subject: [PATCH 2/3] Fix pylint error --- api/handlers/dataexplorerhandler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/handlers/dataexplorerhandler.py b/api/handlers/dataexplorerhandler.py index b3ed416d2..e8a10658c 100644 --- a/api/handlers/dataexplorerhandler.py +++ b/api/handlers/dataexplorerhandler.py @@ -450,7 +450,7 @@ def aggregate_field_values(self): @require_login def get_facets(self): - return_type, filters, search_string = self._parse_request(request_type='facet') + _, filters, search_string = self._parse_request(request_type='facet') facets_q = copy.deepcopy(FACET_QUERY) facets_q['query'] = self._construct_query(None, search_string, filters)['query'] From 5c8447df0d24c03bc065bde9e7d16c454eb98df4 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Fri, 25 Aug 2017 11:55:06 -0500 Subject: [PATCH 3/3] Fix up unit testing --- api/handlers/dataexplorerhandler.py | 12 +++++++++--- test/unit_tests/python/test_dataexplorer.py | 18 +++++++++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/api/handlers/dataexplorerhandler.py b/api/handlers/dataexplorerhandler.py index e8a10658c..737fe3b5b 100644 --- a/api/handlers/dataexplorerhandler.py +++ b/api/handlers/dataexplorerhandler.py @@ -232,6 +232,14 @@ } } +INFO_EXISTS_SCRIPT = { + 'script': """ + (params['_source'].containsKey('file') && + params['_source']['file'].containsKey('info') && + !params['_source']['file']['info'].empty) + """ +} + SOURCE_COMMON = [ "group._id", @@ -586,9 +594,7 @@ def _construct_exact_query(self, return_type, search_string, filters, size=100): if return_type == 'file': query['script_fields'] = { - "info_exists" : { - "script" : "(params['_source'].containsKey('file') && params['_source']['file'].containsKey('info') && !params['_source']['file']['info'].empty)" - } + "info_exists" : INFO_EXISTS_SCRIPT } # Add search_string to "match on _all fields" query, otherwise remove unneeded logic diff --git a/test/unit_tests/python/test_dataexplorer.py b/test/unit_tests/python/test_dataexplorer.py index 96bc3c939..ca9be3c20 100644 --- a/test/unit_tests/python/test_dataexplorer.py +++ b/test/unit_tests/python/test_dataexplorer.py @@ -106,19 +106,24 @@ def test_search(as_public, as_drone, es): # file search cont_type = 'file' - es.search.return_value = {'hits': {'hits': results}} + raw_file_results = [{'fields': {'info_exists': [True]}, '_source': {'file': {}}}] + formatted_file_results = [{'_source': {'file': {'info_exists': True}}}] + es.search.return_value = {'hits': {'hits': copy.deepcopy(raw_file_results)}} + r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) es.search.assert_called_with( body={ '_source': deh.SOURCE[cont_type], 'query': {'bool': {'filter': {'bool': {'must': [{'term': {'container_type': cont_type}}]}}}}, + 'script_fields': {'info_exists': deh.INFO_EXISTS_SCRIPT}, 'size': 100}, doc_type='flywheel', index='data_explorer') assert r.ok - assert r.json['results'] == results + assert r.json['results'] == formatted_file_results # file search w/ search string and filter + es.search.return_value = {'hits': {'hits': copy.deepcopy(raw_file_results)}} r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True, 'search_string': search_str, 'filters': [ {'terms': {filter_key: filter_value}}, {'range': filter_range}, @@ -134,13 +139,15 @@ def test_search(as_public, as_drone, es): {'range': filter_range}, ]}} }}, + 'script_fields': {'info_exists': deh.INFO_EXISTS_SCRIPT}, 'size': 100}, doc_type='flywheel', index='data_explorer') assert r.ok - assert r.json['results'] == results + assert r.json['results'] == formatted_file_results # file search w/ search null filter + es.search.return_value = {'hits': {'hits': copy.deepcopy(raw_file_results)}} r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True, 'filters': [ {'terms': {filter_key: [filter_value, "null"]}}, ]}) @@ -150,7 +157,7 @@ def test_search(as_public, as_drone, es): 'query': {'bool': { 'filter': {'bool': {'must': [ {'term': {'container_type': cont_type}}, - {'bool': + {'bool': {'should': [ {'bool': @@ -172,11 +179,12 @@ def test_search(as_public, as_drone, es): } ]}} }}, + 'script_fields': {'info_exists': deh.INFO_EXISTS_SCRIPT}, 'size': 100}, doc_type='flywheel', index='data_explorer') assert r.ok - assert r.json['results'] == results + assert r.json['results'] == formatted_file_results