From f6ce28c45339ce7e8b01c79b280f6a35ea916bd0 Mon Sep 17 00:00:00 2001 From: Harsha Kethineni Date: Wed, 30 Aug 2017 16:30:14 -0500 Subject: [PATCH 1/2] added tests for downloading sessions with same label --- api/download.py | 29 +++++++++++-------- .../integration_tests/python/test_download.py | 17 +++++++++-- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/api/download.py b/api/download.py index 2794bb7bf..26e947c46 100644 --- a/api/download.py +++ b/api/download.py @@ -111,6 +111,7 @@ def _preflight_archivestream(self, req_spec, collection=None): filename = None used_subpaths = {} + ids_of_paths = {} base_query = {} if not self.superuser_request: base_query['permissions._id'] = self.uid @@ -144,20 +145,20 @@ def _preflight_archivestream(self, req_spec, collection=None): subject_dict[code] = session['subject'] for code, subject in subject_dict.iteritems(): - subject_prefix = prefix + '/' + self._path_from_container(subject, used_subpaths, project['_id']) + subject_prefix = prefix + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, code) subject_prefixes[code] = subject_prefix total_size, file_cnt = self._append_targets(targets, 'subjects', subject, subject_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters')) for session in session_dict.itervalues(): subject_code = session['subject'].get('code', 'unknown_subject') subject = subject_dict[subject_code] - session_prefix = subject_prefixes[subject_code] + '/' + self._path_from_container(session, used_subpaths, subject_code) + session_prefix = subject_prefixes[subject_code] + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session["_id"]) session_prefixes[session['_id']] = session_prefix total_size, file_cnt = self._append_targets(targets, 'sessions', session, session_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters')) for acq in acquisitions: session = session_dict[acq['session']] - acq_prefix = session_prefixes[session['_id']] + '/' + self._path_from_container(acq, used_subpaths, session['_id']) + acq_prefix = session_prefixes[session['_id']] + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id']) total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, acq_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters')) @@ -171,7 +172,7 @@ def _preflight_archivestream(self, req_spec, collection=None): subject = session.get('subject', {'code': 'unknown_subject'}) if not subject.get('code'): subject['code'] = 'unknown_subject' - prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, project['_id']) + '/' + self._path_from_container(session, used_subpaths, project['_id']) + prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, subject['code']) + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session['_id']) total_size, file_cnt = self._append_targets(targets, 'sessions', session, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters')) # If the param `collection` holding a collection id is not None, filter out acquisitions that are not in the collection @@ -181,7 +182,7 @@ def _preflight_archivestream(self, req_spec, collection=None): acquisitions = config.db.acquisitions.find(a_query, ['label', 'files', 'uid', 'timestamp', 'timezone']) for acq in acquisitions: - acq_prefix = prefix + '/' + self._path_from_container(acq, used_subpaths, session['_id']) + acq_prefix = prefix + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id']) total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, acq_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters')) elif item['level'] == 'acquisition': @@ -196,7 +197,7 @@ def _preflight_archivestream(self, req_spec, collection=None): subject['code'] = 'unknown_subject' project = config.db.projects.find_one({'_id': session['project']}, ['group', 'label']) - prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, project['_id']) + '/' + self._path_from_container(session, used_subpaths, project['_id']) + '/' + self._path_from_container(acq, used_subpaths, session['_id']) + prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, subject['code']) + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session['_id']) + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id']) total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters')) elif item['level'] == 'analysis': @@ -204,7 +205,7 @@ def _preflight_archivestream(self, req_spec, collection=None): if not analysis: # silently skip missing objects/objects user does not have access to continue - prefix = self._path_from_container(analysis, used_subpaths, util.sanitize_string_to_filename(analysis['label'])) + prefix = self._path_from_container(analysis, used_subpaths, ids_of_paths, util.sanitize_string_to_filename(analysis['label'])) filename = 'analysis_' + util.sanitize_string_to_filename(analysis['label']) + '.tar' total_size, file_cnt = self._append_targets(targets, 'analyses', analysis, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters')) @@ -218,16 +219,20 @@ def _preflight_archivestream(self, req_spec, collection=None): else: self.abort(404, 'No requested containers could be found') - def _path_from_container(self, container, used_subpaths, parent_id): - def _find_new_path(path, list_used_subpaths): + def _path_from_container(self, container, used_subpaths, ids_of_paths, _id): + def _find_new_path(path, list_used_subpaths, ids_of_paths, _id): """from the input path finds a path that hasn't been used""" path = str(path).replace('/', '_') - if path not in list_used_subpaths: + if path in list_used_subpaths: + return path + elif _id == ids_of_paths.get(path,_id): + ids_of_paths[path] = _id return path i = 0 while True: modified_path = path + '_' + str(i) if modified_path not in list_used_subpaths: + ids_of_paths[modified_path] = _id return modified_path i += 1 path = None @@ -245,8 +250,8 @@ def _find_new_path(path, list_used_subpaths): path = container['code'] if not path: path = 'untitled' - path = _find_new_path(path, used_subpaths.get(parent_id, [])) - used_subpaths[parent_id] = used_subpaths.get(parent_id, []) + [path] + path = _find_new_path(path, used_subpaths.get(id, []), ids_of_paths, _id) + used_subpaths[_id] = used_subpaths.get(_id, []) + [path] return path def archivestream(self, ticket): diff --git a/test/integration_tests/python/test_download.py b/test/integration_tests/python/test_download.py index b2dcc6d50..9e675df37 100644 --- a/test/integration_tests/python/test_download.py +++ b/test/integration_tests/python/test_download.py @@ -5,9 +5,11 @@ def test_download(data_builder, file_form, as_admin, api_db): - project = data_builder.create_project() - session = data_builder.create_session() - acquisition = data_builder.create_acquisition() + project = data_builder.create_project(label='project1') + session = data_builder.create_session(label='session1') + session2 = data_builder.create_session(label='session1') + acquisition = data_builder.create_acquisition(session=session) + acquisition2 = data_builder.create_acquisition(session=session2) # upload the same file to each container created and use different tags to # facilitate download filter tests: @@ -16,6 +18,9 @@ def test_download(data_builder, file_form, as_admin, api_db): as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form( file_name, meta={'name': file_name, 'type': 'csv'})) + as_admin.post('/acquisitions/' + acquisition2 + '/files', files=file_form( + file_name, meta={'name': file_name, 'type': 'csv'})) + as_admin.post('/sessions/' + session + '/files', files=file_form( file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus']})) @@ -39,6 +44,7 @@ def test_download(data_builder, file_form, as_admin, api_db): {'level': 'project', '_id': project}, {'level': 'session', '_id': session}, {'level': 'acquisition', '_id': acquisition}, + {'level': 'acquisition', '_id':acquisition2}, ] }) assert r.ok @@ -52,8 +58,13 @@ def test_download(data_builder, file_form, as_admin, api_db): tar = tarfile.open(mode="r", fileobj=tar_file) # Verify a single file in tar with correct file name + found_second_session = False for tarinfo in tar: assert os.path.basename(tarinfo.name) == file_name + if 'session1_0' in str(tarinfo.name): + found_second_session = True + assert found_second_session + tar.close() # Try to perform the download from a different IP From 7e06aab63e3c85782201653e4ba59f4c75ef51cc Mon Sep 17 00:00:00 2001 From: Harsha Kethineni Date: Wed, 6 Sep 2017 16:54:42 -0500 Subject: [PATCH 2/2] test for #917 --- .../integration_tests/python/test_download.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/integration_tests/python/test_download.py b/test/integration_tests/python/test_download.py index 9e675df37..3a722cfb2 100644 --- a/test/integration_tests/python/test_download.py +++ b/test/integration_tests/python/test_download.py @@ -67,6 +67,41 @@ def test_download(data_builder, file_form, as_admin, api_db): tar.close() + # Download one session with many acquisitions and make sure they are in the same subject folder + + acquisition3 = data_builder.create_acquisition(session=session) + r = as_admin.post('/acquisitions/' + acquisition3 + '/files', files=file_form( + file_name, meta={'name': file_name, 'type': 'csv'})) + assert r.ok + + r = as_admin.post('/download', json={ + 'optional': False, + 'nodes': [ + {'level': 'acquisition', '_id': acquisition}, + {'level': 'acquisition', '_id': acquisition3}, + ] + }) + assert r.ok + ticket = r.json()['ticket'] + + # Perform the download + r = as_admin.get('/download', params={'ticket': ticket}) + assert r.ok + + tar_file = cStringIO.StringIO(r.content) + tar = tarfile.open(mode="r", fileobj=tar_file) + + # Verify a single file in tar with correct file name + found_second_session = False + for tarinfo in tar: + assert os.path.basename(tarinfo.name) == file_name + if 'session1_0' in str(tarinfo.name): + found_second_session = True + assert not found_second_session + + tar.close() + + # Try to perform the download from a different IP update_result = api_db.downloads.update_one( {'_id': ticket},