From 739f226259f2312de15d5d2c96e0d06eefd34444 Mon Sep 17 00:00:00 2001 From: Ambrus Simon Date: Mon, 10 Jul 2017 17:09:32 +0200 Subject: [PATCH 1/6] add unit tests for POST /api/dataexplorer/search --- test/unit_tests/python/conftest.py | 18 ++- test/unit_tests/python/test_dataexplorer.py | 142 ++++++++++++++++++++ 2 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 test/unit_tests/python/test_dataexplorer.py diff --git a/test/unit_tests/python/conftest.py b/test/unit_tests/python/conftest.py index 1de127f37..05699de65 100644 --- a/test/unit_tests/python/conftest.py +++ b/test/unit_tests/python/conftest.py @@ -42,12 +42,23 @@ def log_db(app): return api.config.log_db +@pytest.fixture(scope='session') +def es(app): + """Return Elasticsearch mock (MagickMock instance)""" + return api.config.es + + @pytest.yield_fixture(scope='session') def app(): - """Return api instance that uses mocked os.environ and MongoClient""" - env_patch = mock.patch.dict( - os.environ, {'SCITRAN_CORE_DRONE_SECRET': SCITRAN_CORE_DRONE_SECRET}, clear=True) + """Return api instance that uses mocked os.environ, ElasticSearch and MongoClient""" + test_env = { + 'SCITRAN_CORE_DRONE_SECRET': SCITRAN_CORE_DRONE_SECRET, + 'TERM': 'xterm', # enable terminal features - useful for pdb sessions + } + env_patch = mock.patch.dict(os.environ, test_env, clear=True) env_patch.start() + es_patch = mock.patch('elasticsearch.Elasticsearch') + es_patch.start() mongo_patch = mock.patch('pymongo.MongoClient', new=mongomock.MongoClient) mongo_patch.start() # NOTE db and log_db is created at import time in api.config @@ -55,6 +66,7 @@ def app(): reload(api.config) yield api.web.start.app_factory() mongo_patch.stop() + es_patch.stop() env_patch.stop() diff --git a/test/unit_tests/python/test_dataexplorer.py b/test/unit_tests/python/test_dataexplorer.py new file mode 100644 index 000000000..75060b773 --- /dev/null +++ b/test/unit_tests/python/test_dataexplorer.py @@ -0,0 +1,142 @@ +def test_search(as_public, as_drone, es): + # try to access search w/o login + r = as_public.post('/dataexplorer/search') + assert r.status_code == 403 + + # try to search w/o body + r = as_drone.post('/dataexplorer/search') + assert r.status_code == 400 + + # try to search w/o return_type in body + r = as_drone.post('/dataexplorer/search', json={}) + assert r.status_code == 400 + + # try to search w/ invalid return_type + r = as_drone.post('/dataexplorer/search', json={'return_type': 'test'}) + assert r.status_code == 400 + + # try to search w/ invalid filters + r = as_drone.post('/dataexplorer/search', json={'return_type': 'file', 'filters': 'test'}) + assert r.status_code == 400 + + # session search against elastic mock + cont_type, filter_key, filter_value, filter_range, search_str, results = 'session', 'key', 'value', 'range', 'search', 'results' + es.search.return_value = {'aggregations': {'by_container': {'buckets': [ + {'by_top_hit': {'hits': {'hits': [results]}}}, + ]}}} + # NOTE maybe sources would read better as module level constants in dataexplorerhandler? + session_sources = [ + 'permissions.*', 'session._id', 'session.label', 'session.created', 'session.timestamp', 'subject.code', + 'project.label', 'group.label', 'group._id', 'project._id', 'session.archived', 'project.archived'] + r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'search_string': search_str, 'filters': [ + {'terms': {filter_key: filter_value}}, + {'range': filter_range}, + ]}) + es.search.assert_called_with( + body={ + 'size': 0, + 'query': {'bool': { + 'must': {'match': {'_all': 'search'}}, + 'filter': {'bool': {'must': [ + {'terms': {filter_key + '.raw': filter_value}}, + {'range': filter_range}, + {'term': {'permissions._id': None}} + ]}}, + }}, + 'aggs': {'by_container': {'terms': + {'field': cont_type + '._id', 'size': 100}, + 'aggs': {'by_top_hit': {'top_hits': { + '_source': session_sources, + 'size': 1 + }}} + }} + }, + doc_type='flywheel', + index='data_explorer') + assert r.status_code == 200 + assert r.json['results'] == [results] + + # acquisition search + cont_type = 'acquisition' + acquisition_sources = session_sources + ['acquisition._id', 'acquisition.label', + 'acquisition.created', 'acquisition.timestamp', 'acquisition.archived'] + r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) + es.search.assert_called_with( + body={ + 'size': 0, + 'query': {'match_all': {}}, + 'aggs': {'by_container': {'terms': + {'field': cont_type + '._id', 'size': 100}, + 'aggs': {'by_top_hit': {'top_hits': { + '_source': acquisition_sources, + 'size': 1 + }}} + }} + }, + doc_type='flywheel', + index='data_explorer') + assert r.status_code == 200 + assert r.json['results'] == [results] + + # analysis search + cont_type = 'analysis' + analysis_sources = session_sources + ['analysis._id', 'analysis.label', 'analysis.created'] + r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) + es.search.assert_called_with( + body={ + 'size': 0, + 'query': {'match_all': {}}, + 'aggs': {'by_container': {'terms': + {'field': cont_type + '._id', 'size': 100}, + 'aggs': {'by_top_hit': {'top_hits': { + '_source': analysis_sources, + 'size': 1 + }}} + }} + }, + doc_type='flywheel', + index='data_explorer') + assert r.status_code == 200 + assert r.json['results'] == [results] + + # file search + cont_type = 'file' + file_sources = [ + 'permissions.*', 'session._id', 'session.label', 'session.created', 'session.timestamp', + 'subject.code', 'project.label', 'group.label', 'acquisition.label', 'acquisition._id', + 'group._id', 'project._id', 'analysis._id', 'analysis.label', 'session.archived', + 'acquisition.archived', 'project.archived', 'file.name', 'file.created', 'file.type', + 'file.measurements', 'file.size', 'parent'] + es.search.return_value = {'hits': {'hits': results}} + r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) + es.search.assert_called_with( + body={ + '_source': file_sources, + 'query': {'bool': {'filter': {'bool': {'must': [{'term': {'container_type': cont_type}}]}}}}, + 'size': 100}, + doc_type='flywheel', + index='data_explorer') + assert r.status_code == 200 + assert r.json['results'] == results + + # file search w/ search string and filter + r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True, 'search_string': search_str, 'filters': [ + {'terms': {filter_key: filter_value}}, + {'range': filter_range}, + ]}) + es.search.assert_called_with( + body={ + '_source': file_sources, + 'query': {'bool': { + 'must': {'match': {'_all': search_str}}, + 'filter': {'bool': {'must': [ + {'term': {'container_type': cont_type}}, + {'terms': {filter_key + '.raw': filter_value}}, + {'range': filter_range}, + ]}} + }}, + 'size': 100}, + doc_type='flywheel', + index='data_explorer') + assert r.status_code == 200 + assert r.json['results'] == results From e6d1d008b3625f52860b9ecb83e926e84e23a8a1 Mon Sep 17 00:00:00 2001 From: Ambrus Simon Date: Tue, 11 Jul 2017 21:59:46 +0200 Subject: [PATCH 2/6] add tests for all dataexplorer handler methods --- test/unit_tests/python/test_dataexplorer.py | 132 +++++++++++++++++++- 1 file changed, 131 insertions(+), 1 deletion(-) diff --git a/test/unit_tests/python/test_dataexplorer.py b/test/unit_tests/python/test_dataexplorer.py index 75060b773..738a05ee0 100644 --- a/test/unit_tests/python/test_dataexplorer.py +++ b/test/unit_tests/python/test_dataexplorer.py @@ -1,5 +1,11 @@ +import copy +import json + +import api.handlers.dataexplorerhandler as deh + + def test_search(as_public, as_drone, es): - # try to access search w/o login + # try to search w/o login r = as_public.post('/dataexplorer/search') assert r.status_code == 403 @@ -25,6 +31,9 @@ def test_search(as_public, as_drone, es): {'by_top_hit': {'hits': {'hits': [results]}}}, ]}}} # NOTE maybe sources would read better as module level constants in dataexplorerhandler? + # NOTE try: str(x) will never fail (probably not the desired behavior) at + # * _parse_request#248 + # * search_fields#284 session_sources = [ 'permissions.*', 'session._id', 'session.label', 'session.created', 'session.timestamp', 'subject.code', 'project.label', 'group.label', 'group._id', 'project._id', 'session.archived', 'project.archived'] @@ -140,3 +149,124 @@ def test_search(as_public, as_drone, es): index='data_explorer') assert r.status_code == 200 assert r.json['results'] == results + + +def test_get_facets(as_public, as_drone, es): + # try to get facets w/o login + r = as_public.post('/dataexplorer/facets') + assert r.status_code == 403 + + # get facets w/o sending body + subject_age = 'test' + es.search.return_value = {'aggregations': { + 'session_age': {'subject.age': subject_age}, + 'by_session': {}}} + r = as_drone.post('/dataexplorer/facets') + body = copy.deepcopy(deh.FACET_QUERY) + body.update({'query': {'match_all': {}}}) + es.search.assert_called_with(body=body, doc_type='flywheel', index='data_explorer') + assert r.status_code == 200 + assert r.json == {'facets': {'by_session': {'subject.age': subject_age}}} + + +def test_search_fields(as_public, as_drone, es): + # try to search fields w/o login + r = as_public.post('/dataexplorer/search/fields') + assert r.status_code == 403 + + # search fields + query_field, result_source = 'field', 'source' + es.search.return_value = {'hits': {'hits': [{'_source': result_source}]}} + r = as_drone.post('/dataexplorer/search/fields', json={'field': query_field}) + es.search.assert_called_with( + body={'size': 20, 'query': {'match': {'name': query_field}}}, + doc_type='flywheel_field', + index='data_explorer_fields') + assert r.status_code == 200 + assert r.json == [result_source] + + +def test_index_fields(as_public, as_drone, es): + # try to index fields w/o login + r = as_public.post('/dataexplorer/index/fields') + assert r.status_code == 403 + + # setup es indices mock + indices = set() + def es_indices_exists(index): return index in indices + def es_indices_create(index=None, body=None): indices.add(index) + def es_indices_delete(index): indices.remove(index) + es.indices.exists.side_effect = es_indices_exists + + # try to index fields before data_explorer index is available + r = as_drone.post('/dataexplorer/index/fields') + es.indices.exists.assert_called_with('data_explorer') + assert r.status_code == 404 + indices.add('data_explorer') + + # try to (re)index data_explorer_fields w/ hard-reset=true (exc @ delete) + indices.add('data_explorer_fields') + es.indices.delete.side_effect = Exception('delete') + r = as_drone.post('/dataexplorer/index/fields?hard-reset=true') + es.indices.delete.assert_called_with(index='data_explorer_fields') + assert r.status_code == 500 + es.indices.delete.side_effect = es_indices_delete + + # try to (re)index data_explorer_fields w/ hard-reset=true (exc @ create) + es.indices.create.side_effect = Exception('create') + r = as_drone.post('/dataexplorer/index/fields?hard-reset=true') + es.indices.exists.assert_called_with('data_explorer_fields') + assert es.indices.create.called + assert r.status_code == 500 + es.indices.create.side_effect = es_indices_create + + # try to (re)index data_explorer_fields w/ hard-reset=true (exc @ get_mapping) + es.indices.get_mapping.side_effect = Exception('get_mapping') + r = as_drone.post('/dataexplorer/index/fields?hard-reset=true') + assert r.status_code == 404 + es.indices.get_mapping.side_effect = None + + # (re)index data_explorer_fields w/ hard-reset=true + r = as_drone.post('/dataexplorer/index/fields?hard-reset=true') + es.indices.create.assert_called_with(index='data_explorer_fields', body={ + 'settings': {'number_of_shards': 1, 'number_of_replicas': 0, 'analysis': deh.ANALYSIS}, + 'mappings': {'_default_': {'_all': {'enabled' : True}, 'dynamic_templates': deh.DYNAMIC_TEMPLATES}, 'flywheel': {}}}) + assert r.status_code == 200 + + # index data_explorer_fields - test ignored fields + ignored_fields = ['_all', 'dynamic_templates', 'analysis_reference', 'file_reference', 'parent', 'container_type', 'origin', 'permissions', '_id'] + fields = {field: None for field in ignored_fields} + es.indices.get_mapping.return_value = {'data_explorer': {'mappings': {'flywheel': {'properties': fields}}}} + es.index.reset_mock() + r = as_drone.post('/dataexplorer/index/fields') + assert not es.indices.index.called + assert r.status_code == 200 + + # index data_explorer_fields - test type "flattening" + type_map = { + 'string': ['text', 'keyword'], + 'integer': ['long', 'integer', 'short', 'byte'], + 'float': ['double', 'float'], + 'date': ['date'], + 'boolean': ['boolean'], + 'object': ['object'], + None: ['unrecognized'], + # NOTE _get_field_type returns None for unrecognized field_types + } + type_map_r = {vi: k for k, v in type_map.iteritems() for vi in v} + fields = {k + 'field': {'type': k} for k in type_map_r} + es.indices.get_mapping.return_value = {'data_explorer': {'mappings': {'flywheel': {'properties': fields}}}} + es.index.reset_mock() + r = as_drone.post('/dataexplorer/index/fields') + for field_name in fields: + field_type = type_map_r[field_name.replace('field', '')] + if field_type == 'object': + continue + es.index.assert_any_call( + body=json.dumps({'name': field_name, 'type': field_type}), + doc_type='flywheel_field', + id=field_name, + index='data_explorer_fields') + assert r.status_code == 200 + + # TODO index data_explorer_fields - test recursion From 3cffedad48d159663dcbb2af757ffa0284a02168 Mon Sep 17 00:00:00 2001 From: Ambrus Simon Date: Wed, 12 Jul 2017 16:10:39 +0200 Subject: [PATCH 3/6] rebase and fix dataexplorer tests for follow-up changes --- api/handlers/dataexplorerhandler.py | 65 ++++++++++++++++----- test/unit_tests/python/test_dataexplorer.py | 53 +++++++++-------- 2 files changed, 75 insertions(+), 43 deletions(-) diff --git a/api/handlers/dataexplorerhandler.py b/api/handlers/dataexplorerhandler.py index 3ef026c65..414336388 100644 --- a/api/handlers/dataexplorerhandler.py +++ b/api/handlers/dataexplorerhandler.py @@ -228,6 +228,52 @@ } +SOURCE_COMMON = [ + "group._id", + "group.label", + "permissions.*", + "project._id", + "project.archived", + "project.label", + "session._id", + "session.archived", + "session.created", + "session.label", + "session.timestamp", + "subject.code", +] + +SOURCE = { + "file": SOURCE_COMMON + [ + "acquisition._id", + "acquisition.archived", + "acquisition.label", + "analysis._id", + "analysis.label", + "file.created", + "file.measurements", + "file.name", + "file.size", + "file.type", + ], + "session": SOURCE_COMMON, + "acquisition": SOURCE_COMMON + [ + "acquisition._id", + "acquisition.archived", + "acquisition.created", + "acquisition.label", + "acquisition.timestamp", + ], + "analysis": SOURCE_COMMON + [ + "analysis._id", + "analysis.created", + "analysis.label", + "analysis.parent", + "analysis.user", + ], +} + + class DataExplorerHandler(base.RequestHandler): # pylint: disable=broad-except @@ -373,6 +419,7 @@ def search_fields(self): try: field_query = str(field_query) except ValueError: + # NOTE unreachable self.abort(400, 'Must specify string for field query') es_query = { @@ -415,15 +462,6 @@ def _construct_query(self, return_type, search_string, filters, size=100): if return_type == 'file': return self._construct_file_query(search_string, filters, size) - source = [ "permissions.*", "session._id", "session.label", "session.created", "session.timestamp", - "subject.code", "project.label", "group.label", "group._id", "project._id", "session.archived", "project.archived" ] - - if return_type == 'acquisition': - source.extend(["acquisition._id", "acquisition.label", "acquisition.created", "acquisition.timestamp", "acquisition.archived"]) - - if return_type == 'analysis': - source.extend(["analysis._id", "analysis.label", "analysis.created", "analysis.parent", "analysis.user"]) - query = { "size": 0, "query": { @@ -452,7 +490,7 @@ def _construct_query(self, return_type, search_string, filters, size=100): "aggs": { "by_top_hit": { "top_hits": { - "_source": source, + "_source": SOURCE[return_type], "size": 1 } } @@ -475,14 +513,9 @@ def _construct_query(self, return_type, search_string, filters, size=100): return query def _construct_file_query(self, search_string, filters, size=100): - source = [ "permissions.*", "session._id", "session.label", "session.created", - "session.timestamp", "subject.code", "project.label", "group.label", "acquisition.label", - "acquisition._id", "group._id", "project._id", "analysis._id", "analysis.label", - "session.archived", "acquisition.archived", "project.archived" ] - source.extend(["file.name", "file.created", "file.type", "file.measurements", "file.size", "parent"]) query = { "size": size, - "_source": source, + "_source": SOURCE['file'], "query": { "bool": { "must": { diff --git a/test/unit_tests/python/test_dataexplorer.py b/test/unit_tests/python/test_dataexplorer.py index 738a05ee0..a22576935 100644 --- a/test/unit_tests/python/test_dataexplorer.py +++ b/test/unit_tests/python/test_dataexplorer.py @@ -1,6 +1,8 @@ import copy import json +import elasticsearch + import api.handlers.dataexplorerhandler as deh @@ -30,13 +32,6 @@ def test_search(as_public, as_drone, es): es.search.return_value = {'aggregations': {'by_container': {'buckets': [ {'by_top_hit': {'hits': {'hits': [results]}}}, ]}}} - # NOTE maybe sources would read better as module level constants in dataexplorerhandler? - # NOTE try: str(x) will never fail (probably not the desired behavior) at - # * _parse_request#248 - # * search_fields#284 - session_sources = [ - 'permissions.*', 'session._id', 'session.label', 'session.created', 'session.timestamp', 'subject.code', - 'project.label', 'group.label', 'group._id', 'project._id', 'session.archived', 'project.archived'] r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'search_string': search_str, 'filters': [ {'terms': {filter_key: filter_value}}, {'range': filter_range}, @@ -55,7 +50,7 @@ def test_search(as_public, as_drone, es): 'aggs': {'by_container': {'terms': {'field': cont_type + '._id', 'size': 100}, 'aggs': {'by_top_hit': {'top_hits': { - '_source': session_sources, + '_source': deh.SOURCE[cont_type], 'size': 1 }}} }} @@ -67,8 +62,6 @@ def test_search(as_public, as_drone, es): # acquisition search cont_type = 'acquisition' - acquisition_sources = session_sources + ['acquisition._id', 'acquisition.label', - 'acquisition.created', 'acquisition.timestamp', 'acquisition.archived'] r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) es.search.assert_called_with( body={ @@ -77,7 +70,7 @@ def test_search(as_public, as_drone, es): 'aggs': {'by_container': {'terms': {'field': cont_type + '._id', 'size': 100}, 'aggs': {'by_top_hit': {'top_hits': { - '_source': acquisition_sources, + '_source': deh.SOURCE[cont_type], 'size': 1 }}} }} @@ -89,7 +82,6 @@ def test_search(as_public, as_drone, es): # analysis search cont_type = 'analysis' - analysis_sources = session_sources + ['analysis._id', 'analysis.label', 'analysis.created'] r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) es.search.assert_called_with( body={ @@ -98,7 +90,7 @@ def test_search(as_public, as_drone, es): 'aggs': {'by_container': {'terms': {'field': cont_type + '._id', 'size': 100}, 'aggs': {'by_top_hit': {'top_hits': { - '_source': analysis_sources, + '_source': deh.SOURCE[cont_type], 'size': 1 }}} }} @@ -110,17 +102,11 @@ def test_search(as_public, as_drone, es): # file search cont_type = 'file' - file_sources = [ - 'permissions.*', 'session._id', 'session.label', 'session.created', 'session.timestamp', - 'subject.code', 'project.label', 'group.label', 'acquisition.label', 'acquisition._id', - 'group._id', 'project._id', 'analysis._id', 'analysis.label', 'session.archived', - 'acquisition.archived', 'project.archived', 'file.name', 'file.created', 'file.type', - 'file.measurements', 'file.size', 'parent'] es.search.return_value = {'hits': {'hits': results}} r = as_drone.post('/dataexplorer/search', json={'return_type': cont_type, 'all_data': True}) es.search.assert_called_with( body={ - '_source': file_sources, + '_source': deh.SOURCE[cont_type], 'query': {'bool': {'filter': {'bool': {'must': [{'term': {'container_type': cont_type}}]}}}}, 'size': 100}, doc_type='flywheel', @@ -135,7 +121,7 @@ def test_search(as_public, as_drone, es): ]}) es.search.assert_called_with( body={ - '_source': file_sources, + '_source': deh.SOURCE[cont_type], 'query': {'bool': { 'must': {'match': {'_all': search_str}}, 'filter': {'bool': {'must': [ @@ -179,7 +165,7 @@ def test_search_fields(as_public, as_drone, es): es.search.return_value = {'hits': {'hits': [{'_source': result_source}]}} r = as_drone.post('/dataexplorer/search/fields', json={'field': query_field}) es.search.assert_called_with( - body={'size': 20, 'query': {'match': {'name': query_field}}}, + body={'size': 15, 'query': {'match': {'name': query_field}}}, doc_type='flywheel_field', index='data_explorer_fields') assert r.status_code == 200 @@ -191,11 +177,16 @@ def test_index_fields(as_public, as_drone, es): r = as_public.post('/dataexplorer/index/fields') assert r.status_code == 403 - # setup es indices mock + # setup functions for later use in es.indices.exists mock indices = set() def es_indices_exists(index): return index in indices def es_indices_create(index=None, body=None): indices.add(index) def es_indices_delete(index): indices.remove(index) + + # try to index fields w/ es unavailable (exc @ exists) + es.indices.exists.side_effect = elasticsearch.TransportError('test', 'test', 'test') + r = as_drone.post('/dataexplorer/index/fields') + assert r.status_code == 404 es.indices.exists.side_effect = es_indices_exists # try to index fields before data_explorer index is available @@ -206,14 +197,14 @@ def es_indices_delete(index): indices.remove(index) # try to (re)index data_explorer_fields w/ hard-reset=true (exc @ delete) indices.add('data_explorer_fields') - es.indices.delete.side_effect = Exception('delete') + es.indices.delete.side_effect = elasticsearch.ElasticsearchException r = as_drone.post('/dataexplorer/index/fields?hard-reset=true') es.indices.delete.assert_called_with(index='data_explorer_fields') assert r.status_code == 500 es.indices.delete.side_effect = es_indices_delete # try to (re)index data_explorer_fields w/ hard-reset=true (exc @ create) - es.indices.create.side_effect = Exception('create') + es.indices.create.side_effect = elasticsearch.ElasticsearchException r = as_drone.post('/dataexplorer/index/fields?hard-reset=true') es.indices.exists.assert_called_with('data_explorer_fields') assert es.indices.create.called @@ -221,7 +212,7 @@ def es_indices_delete(index): indices.remove(index) es.indices.create.side_effect = es_indices_create # try to (re)index data_explorer_fields w/ hard-reset=true (exc @ get_mapping) - es.indices.get_mapping.side_effect = Exception('get_mapping') + es.indices.get_mapping.side_effect = KeyError r = as_drone.post('/dataexplorer/index/fields?hard-reset=true') assert r.status_code == 404 es.indices.get_mapping.side_effect = None @@ -256,14 +247,22 @@ def es_indices_delete(index): indices.remove(index) type_map_r = {vi: k for k, v in type_map.iteritems() for vi in v} fields = {k + 'field': {'type': k} for k in type_map_r} es.indices.get_mapping.return_value = {'data_explorer': {'mappings': {'flywheel': {'properties': fields}}}} + es.search.return_value = {'aggregations': {'results': { + 'sum_other_doc_count': 0, + 'buckets': [{'doc_count': 0}]}}} es.index.reset_mock() r = as_drone.post('/dataexplorer/index/fields') for field_name in fields: field_type = type_map_r[field_name.replace('field', '')] if field_type == 'object': continue + if field_type == 'string': + es.search.assert_any_call( + body={'aggs': {'results': {'terms': {'field': field_name + '.raw', 'size': 15}}}, 'size': 0}, + doc_type='flywheel', + index='data_explorer') es.index.assert_any_call( - body=json.dumps({'name': field_name, 'type': field_type}), + body=json.dumps({'name': field_name, 'type': field_type, 'facet': False}), doc_type='flywheel_field', id=field_name, index='data_explorer_fields') From 00a1b03db16041d4190f3b1877829a9c27a14c0e Mon Sep 17 00:00:00 2001 From: Ambrus Simon Date: Wed, 12 Jul 2017 16:55:31 +0200 Subject: [PATCH 4/6] add tests for /dataexplorer/search/fields/aggregate (typeahead) --- api/handlers/dataexplorerhandler.py | 1 + test/unit_tests/python/test_dataexplorer.py | 90 ++++++++++++++++++--- 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/api/handlers/dataexplorerhandler.py b/api/handlers/dataexplorerhandler.py index 414336388..afa268fce 100644 --- a/api/handlers/dataexplorerhandler.py +++ b/api/handlers/dataexplorerhandler.py @@ -354,6 +354,7 @@ def aggregate_field_values(self): } } if not filters: + # NOTE unreachable body['query']['bool'].pop('filter') if search_string is None: body['query']['bool']['must'] = MATCH_ALL diff --git a/test/unit_tests/python/test_dataexplorer.py b/test/unit_tests/python/test_dataexplorer.py index a22576935..0f465479d 100644 --- a/test/unit_tests/python/test_dataexplorer.py +++ b/test/unit_tests/python/test_dataexplorer.py @@ -57,7 +57,7 @@ def test_search(as_public, as_drone, es): }, doc_type='flywheel', index='data_explorer') - assert r.status_code == 200 + assert r.ok assert r.json['results'] == [results] # acquisition search @@ -77,7 +77,7 @@ def test_search(as_public, as_drone, es): }, doc_type='flywheel', index='data_explorer') - assert r.status_code == 200 + assert r.ok assert r.json['results'] == [results] # analysis search @@ -97,7 +97,7 @@ def test_search(as_public, as_drone, es): }, doc_type='flywheel', index='data_explorer') - assert r.status_code == 200 + assert r.ok assert r.json['results'] == [results] # file search @@ -111,7 +111,7 @@ def test_search(as_public, as_drone, es): 'size': 100}, doc_type='flywheel', index='data_explorer') - assert r.status_code == 200 + assert r.ok assert r.json['results'] == results # file search w/ search string and filter @@ -133,7 +133,7 @@ def test_search(as_public, as_drone, es): 'size': 100}, doc_type='flywheel', index='data_explorer') - assert r.status_code == 200 + assert r.ok assert r.json['results'] == results @@ -151,7 +151,7 @@ def test_get_facets(as_public, as_drone, es): body = copy.deepcopy(deh.FACET_QUERY) body.update({'query': {'match_all': {}}}) es.search.assert_called_with(body=body, doc_type='flywheel', index='data_explorer') - assert r.status_code == 200 + assert r.ok assert r.json == {'facets': {'by_session': {'subject.age': subject_age}}} @@ -168,7 +168,7 @@ def test_search_fields(as_public, as_drone, es): body={'size': 15, 'query': {'match': {'name': query_field}}}, doc_type='flywheel_field', index='data_explorer_fields') - assert r.status_code == 200 + assert r.ok assert r.json == [result_source] @@ -222,7 +222,7 @@ def es_indices_delete(index): indices.remove(index) es.indices.create.assert_called_with(index='data_explorer_fields', body={ 'settings': {'number_of_shards': 1, 'number_of_replicas': 0, 'analysis': deh.ANALYSIS}, 'mappings': {'_default_': {'_all': {'enabled' : True}, 'dynamic_templates': deh.DYNAMIC_TEMPLATES}, 'flywheel': {}}}) - assert r.status_code == 200 + assert r.ok # index data_explorer_fields - test ignored fields ignored_fields = ['_all', 'dynamic_templates', 'analysis_reference', 'file_reference', 'parent', 'container_type', 'origin', 'permissions', '_id'] @@ -231,7 +231,7 @@ def es_indices_delete(index): indices.remove(index) es.index.reset_mock() r = as_drone.post('/dataexplorer/index/fields') assert not es.indices.index.called - assert r.status_code == 200 + assert r.ok # index data_explorer_fields - test type "flattening" type_map = { @@ -266,6 +266,76 @@ def es_indices_delete(index): indices.remove(index) doc_type='flywheel_field', id=field_name, index='data_explorer_fields') - assert r.status_code == 200 + assert r.ok # TODO index data_explorer_fields - test recursion + # TODO index data_explorer_fields - test facet=True + + +def test_aggregate_field_values(as_public, as_drone, es): + # try to get typeadhed w/o login + r = as_public.post('/dataexplorer/search/fields/aggregate') + assert r.status_code == 403 + + # try to get typeadhed w/o body + r = as_drone.post('/dataexplorer/search/fields/aggregate') + assert r.status_code == 400 + + # try to get typeadhed for non-existent field + field_name, search_str, result = 'field', 'search', 'result' + es.get.side_effect = elasticsearch.TransportError('test', 'test', 'test') + r = as_drone.post('/dataexplorer/search/fields/aggregate', json={'field_name': field_name}) + assert r.status_code == 404 + es.get.side_effect = None + + # try to get typeadhed for a field type that's not allowed + es.get.return_value = {'_source': {'type': 'test'}} + r = as_drone.post('/dataexplorer/search/fields/aggregate', json={'field_name': field_name}) + assert r.status_code == 400 + + # get typeahead w/o search string for string|boolean field type + es.get.return_value = {'_source': {'type': 'string'}} + es.search.return_value = {'aggregations': {'results': result}} + r = as_drone.post('/dataexplorer/search/fields/aggregate', json={'field_name': field_name}) + es.search.assert_called_with( + body={'aggs': {'results': {'terms': {'field': field_name + '.raw', 'size': 15}}}, + 'query': {'bool': {'filter': [{'term': {'permissions._id': None}}], 'must': {'match_all': {}}}}, + 'size': 0}, + doc_type='flywheel', + index='data_explorer') + assert r.ok + assert r.json == result + + # get typeahead w/ search string for string|boolean field type + r = as_drone.post('/dataexplorer/search/fields/aggregate', json={'field_name': field_name, 'search_string': search_str}) + es.search.assert_called_with( + body={'aggs': {'results': {'terms': {'field': field_name + '.raw', 'size': 15}}}, + 'query': {'bool': {'filter': [{'term': {'permissions._id': None}}], 'must': {'match': {'field': search_str}}}}, + 'size': 0}, + doc_type='flywheel', + index='data_explorer') + assert r.ok + assert r.json == result + + # get typeahead w/o search string for integer|float|date field type + es.get.return_value = {'_source': {'type': 'integer'}} + r = as_drone.post('/dataexplorer/search/fields/aggregate', json={'field_name': field_name}) + es.search.assert_called_with( + body={'aggs': {'results': {'stats': {'field': field_name}}}, + 'query': {'bool': {'filter': [{'term': {'permissions._id': None}}], 'must': {'match_all': {}}}}, + 'size': 0}, + doc_type='flywheel', + index='data_explorer') + assert r.ok + assert r.json == result + + # get typeahead w/ search string for integer|float|date field type + r = as_drone.post('/dataexplorer/search/fields/aggregate', json={'field_name': field_name, 'search_string': search_str}) + es.search.assert_called_with( + body={'aggs': {'results': {'stats': {'field': field_name}}}, + 'query': {'bool': {'filter': [{'term': {'permissions._id': None}}], 'must': {'match': {'field': search_str}}}}, + 'size': 0}, + doc_type='flywheel', + index='data_explorer') + assert r.ok + assert r.json == result From 6b7d6819110102f679f0f77188173f19909a3149 Mon Sep 17 00:00:00 2001 From: Ambrus Simon Date: Wed, 12 Jul 2017 17:26:09 +0200 Subject: [PATCH 5/6] subclass TransportError in test to fix ci failure --- test/unit_tests/python/test_dataexplorer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/unit_tests/python/test_dataexplorer.py b/test/unit_tests/python/test_dataexplorer.py index 0f465479d..363e55715 100644 --- a/test/unit_tests/python/test_dataexplorer.py +++ b/test/unit_tests/python/test_dataexplorer.py @@ -6,6 +6,10 @@ import api.handlers.dataexplorerhandler as deh +class TestTransportError(elasticsearch.TransportError): + def __str__(self): + return 'TestTransportError' + def test_search(as_public, as_drone, es): # try to search w/o login r = as_public.post('/dataexplorer/search') @@ -184,7 +188,7 @@ def es_indices_create(index=None, body=None): indices.add(index) def es_indices_delete(index): indices.remove(index) # try to index fields w/ es unavailable (exc @ exists) - es.indices.exists.side_effect = elasticsearch.TransportError('test', 'test', 'test') + es.indices.exists.side_effect = TestTransportError r = as_drone.post('/dataexplorer/index/fields') assert r.status_code == 404 es.indices.exists.side_effect = es_indices_exists @@ -283,7 +287,7 @@ def test_aggregate_field_values(as_public, as_drone, es): # try to get typeadhed for non-existent field field_name, search_str, result = 'field', 'search', 'result' - es.get.side_effect = elasticsearch.TransportError('test', 'test', 'test') + es.get.side_effect = TestTransportError r = as_drone.post('/dataexplorer/search/fields/aggregate', json={'field_name': field_name}) assert r.status_code == 404 es.get.side_effect = None From bd67b59db9a79edb7e35a020a22c77055b57e4ef Mon Sep 17 00:00:00 2001 From: Ambrus Simon Date: Thu, 20 Jul 2017 21:11:02 +0200 Subject: [PATCH 6/6] remove non-reachable try / add todo --- api/handlers/dataexplorerhandler.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/api/handlers/dataexplorerhandler.py b/api/handlers/dataexplorerhandler.py index afa268fce..39b6b56f3 100644 --- a/api/handlers/dataexplorerhandler.py +++ b/api/handlers/dataexplorerhandler.py @@ -354,7 +354,7 @@ def aggregate_field_values(self): } } if not filters: - # NOTE unreachable + # TODO add non-user auth support (#865) body['query']['bool'].pop('filter') if search_string is None: body['query']['bool']['must'] = MATCH_ALL @@ -417,12 +417,6 @@ def get_facets(self): def search_fields(self): field_query = self.request.json_body.get('field') - try: - field_query = str(field_query) - except ValueError: - # NOTE unreachable - self.abort(400, 'Must specify string for field query') - es_query = { "size": 15, "query": {