diff --git a/api/handlers/dataexplorerhandler.py b/api/handlers/dataexplorerhandler.py index 4d37690f8..737fe3b5b 100644 --- a/api/handlers/dataexplorerhandler.py +++ b/api/handlers/dataexplorerhandler.py @@ -232,6 +232,14 @@ } } +INFO_EXISTS_SCRIPT = { + 'script': """ + (params['_source'].containsKey('file') && + params['_source']['file'].containsKey('info') && + !params['_source']['file']['info'].empty) + """ +} + SOURCE_COMMON = [ "group._id", @@ -450,10 +458,10 @@ def aggregate_field_values(self): @require_login def get_facets(self): - return_type, filters, search_string = self._parse_request(request_type='facet') + _, filters, search_string = self._parse_request(request_type='facet') facets_q = copy.deepcopy(FACET_QUERY) - facets_q['query'] = self._construct_query(return_type, search_string, filters)['query'] + facets_q['query'] = self._construct_query(None, search_string, filters)['query'] # if the query comes back with a return_type agg, remove it facets_q['query'].pop('aggs', None) @@ -584,6 +592,11 @@ def _construct_exact_query(self, return_type, search_string, filters, size=100): } } + if return_type == 'file': + query['script_fields'] = { + "info_exists" : INFO_EXISTS_SCRIPT + } + # Add search_string to "match on _all fields" query, otherwise remove unneeded logic if search_string: query['query']['bool']['must']['match']['_all'] = search_string @@ -610,7 +623,7 @@ def _run_query(self, es_query, result_type): def _process_results(self, results, result_type): if result_type in EXACT_CONTAINERS: - return self._process_exact_results(results) + return self._process_exact_results(results, result_type) else: containers = results['aggregations']['by_container']['buckets'] modified_results = [] @@ -618,8 +631,17 @@ def _process_results(self, results, result_type): modified_results.append(c['by_top_hit']['hits']['hits'][0]) return modified_results - def _process_exact_results(self, results): - return results['hits']['hits'] + def _process_exact_results(self, results, result_type): + results = results['hits']['hits'] + if result_type == 'file': + + # Note: At some point this would be better suited + # as an indexed field rather than scripted on the fly + for r in results: + fields = r.pop('fields', {}) + r['_source']['file']['info_exists'] = fields.get('info_exists')[0] + + return results diff --git a/test/unit_tests/python/test_dataexplorer.py b/test/unit_tests/python/test_dataexplorer.py index 96bc3c939..ca9be3c20 100644 --- a/test/unit_tests/python/test_dataexplorer.py +++ b/test/unit_tests/python/test_dataexplorer.py @@ -106,19 +106,24 @@ def test_search(as_public, as_drone, es): # file search cont_type = 'file' - es.search.return_value = {'hits': {'hits': results}} + raw_file_results = [{'fields': {'info_exists': [True]}, '_source': {'file': {}}}] + formatted_file_results = [{'_source': {'file': {'info_exists': True}}}] + es.search.return_value = {'hits': {'hits': copy.deepcopy(raw_file_results)}} + r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) es.search.assert_called_with( body={ '_source': deh.SOURCE[cont_type], 'query': {'bool': {'filter': {'bool': {'must': [{'term': {'container_type': cont_type}}]}}}}, + 'script_fields': {'info_exists': deh.INFO_EXISTS_SCRIPT}, 'size': 100}, doc_type='flywheel', index='data_explorer') assert r.ok - assert r.json['results'] == results + assert r.json['results'] == formatted_file_results # file search w/ search string and filter + es.search.return_value = {'hits': {'hits': copy.deepcopy(raw_file_results)}} r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True, 'search_string': search_str, 'filters': [ {'terms': {filter_key: filter_value}}, {'range': filter_range}, @@ -134,13 +139,15 @@ def test_search(as_public, as_drone, es): {'range': filter_range}, ]}} }}, + 'script_fields': {'info_exists': deh.INFO_EXISTS_SCRIPT}, 'size': 100}, doc_type='flywheel', index='data_explorer') assert r.ok - assert r.json['results'] == results + assert r.json['results'] == formatted_file_results # file search w/ search null filter + es.search.return_value = {'hits': {'hits': copy.deepcopy(raw_file_results)}} r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True, 'filters': [ {'terms': {filter_key: [filter_value, "null"]}}, ]}) @@ -150,7 +157,7 @@ def test_search(as_public, as_drone, es): 'query': {'bool': { 'filter': {'bool': {'must': [ {'term': {'container_type': cont_type}}, - {'bool': + {'bool': {'should': [ {'bool': @@ -172,11 +179,12 @@ def test_search(as_public, as_drone, es): } ]}} }}, + 'script_fields': {'info_exists': deh.INFO_EXISTS_SCRIPT}, 'size': 100}, doc_type='flywheel', index='data_explorer') assert r.ok - assert r.json['results'] == results + assert r.json['results'] == formatted_file_results