From f6ce28c45339ce7e8b01c79b280f6a35ea916bd0 Mon Sep 17 00:00:00 2001
From: Harsha Kethineni <harshakethineni@flywheel.io>
Date: Wed, 30 Aug 2017 16:30:14 -0500
Subject: [PATCH 1/2] added tests for downloading sessions with same label

---
 api/download.py                               | 29 +++++++++++--------
 .../integration_tests/python/test_download.py | 17 +++++++++--
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/api/download.py b/api/download.py
index 2794bb7bf..26e947c46 100644
--- a/api/download.py
+++ b/api/download.py
@@ -111,6 +111,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
         filename = None
 
         used_subpaths = {}
+        ids_of_paths = {}
         base_query = {}
         if not self.superuser_request:
             base_query['permissions._id'] = self.uid
@@ -144,20 +145,20 @@ def _preflight_archivestream(self, req_spec, collection=None):
                         subject_dict[code] = session['subject']
 
                 for code, subject in subject_dict.iteritems():
-                    subject_prefix = prefix + '/' + self._path_from_container(subject, used_subpaths, project['_id'])
+                    subject_prefix = prefix + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, code)
                     subject_prefixes[code] = subject_prefix
                     total_size, file_cnt = self._append_targets(targets, 'subjects', subject, subject_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))
 
                 for session in session_dict.itervalues():
                     subject_code = session['subject'].get('code', 'unknown_subject')
                     subject = subject_dict[subject_code]
-                    session_prefix = subject_prefixes[subject_code] + '/' + self._path_from_container(session, used_subpaths, subject_code)
+                    session_prefix = subject_prefixes[subject_code] + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session["_id"])
                     session_prefixes[session['_id']] = session_prefix
                     total_size, file_cnt = self._append_targets(targets, 'sessions', session, session_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))
 
                 for acq in acquisitions:
                     session = session_dict[acq['session']]
-                    acq_prefix = session_prefixes[session['_id']] + '/' + self._path_from_container(acq, used_subpaths, session['_id'])
+                    acq_prefix = session_prefixes[session['_id']] + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id'])
                     total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, acq_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))
 
 
@@ -171,7 +172,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
                 subject = session.get('subject', {'code': 'unknown_subject'})
                 if not subject.get('code'):
                     subject['code'] = 'unknown_subject'
-                prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, project['_id']) + '/' + self._path_from_container(session, used_subpaths, project['_id'])
+                prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, subject['code']) + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session['_id'])
                 total_size, file_cnt = self._append_targets(targets, 'sessions', session, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))
 
                 # If the param `collection` holding a collection id is not None, filter out acquisitions that are not in the collection
@@ -181,7 +182,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
                 acquisitions = config.db.acquisitions.find(a_query, ['label', 'files', 'uid', 'timestamp', 'timezone'])
 
                 for acq in acquisitions:
-                    acq_prefix = prefix + '/' + self._path_from_container(acq, used_subpaths, session['_id'])
+                    acq_prefix = prefix + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id'])
                     total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, acq_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))
 
             elif item['level'] == 'acquisition':
@@ -196,7 +197,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
                     subject['code'] = 'unknown_subject'
 
                 project = config.db.projects.find_one({'_id': session['project']}, ['group', 'label'])
-                prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, project['_id']) + '/' + self._path_from_container(session, used_subpaths, project['_id']) + '/' + self._path_from_container(acq, used_subpaths, session['_id'])
+                prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, subject['code']) + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session['_id']) + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id'])
                 total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))
 
             elif item['level'] == 'analysis':
@@ -204,7 +205,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
                 if not analysis:
                     # silently skip missing objects/objects user does not have access to
                     continue
-                prefix = self._path_from_container(analysis, used_subpaths, util.sanitize_string_to_filename(analysis['label']))
+                prefix = self._path_from_container(analysis, used_subpaths, ids_of_paths, util.sanitize_string_to_filename(analysis['label']))
                 filename = 'analysis_' + util.sanitize_string_to_filename(analysis['label']) + '.tar'
                 total_size, file_cnt = self._append_targets(targets, 'analyses', analysis, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))
 
@@ -218,16 +219,20 @@ def _preflight_archivestream(self, req_spec, collection=None):
         else:
             self.abort(404, 'No requested containers could be found')
 
-    def _path_from_container(self, container, used_subpaths, parent_id):
-        def _find_new_path(path, list_used_subpaths):
+    def _path_from_container(self, container, used_subpaths, ids_of_paths, _id):
+        def _find_new_path(path, list_used_subpaths, ids_of_paths, _id):
             """from the input path finds a path that hasn't been used"""
             path = str(path).replace('/', '_')
-            if path not in list_used_subpaths:
+            if path in list_used_subpaths:
+                return path
+            elif _id == ids_of_paths.get(path,_id):
+                ids_of_paths[path] = _id
                 return path
             i = 0
             while True:
                 modified_path = path + '_' + str(i)
                 if modified_path not in list_used_subpaths:
+                    ids_of_paths[modified_path] = _id
                     return modified_path
                 i += 1
         path = None
@@ -245,8 +250,8 @@ def _find_new_path(path, list_used_subpaths):
             path = container['code']
         if not path:
             path = 'untitled'
-        path = _find_new_path(path, used_subpaths.get(parent_id, []))
-        used_subpaths[parent_id] = used_subpaths.get(parent_id, []) + [path]
+        path = _find_new_path(path, used_subpaths.get(id, []), ids_of_paths, _id)
+        used_subpaths[_id] = used_subpaths.get(_id, []) + [path]
         return path
 
     def archivestream(self, ticket):
diff --git a/test/integration_tests/python/test_download.py b/test/integration_tests/python/test_download.py
index b2dcc6d50..9e675df37 100644
--- a/test/integration_tests/python/test_download.py
+++ b/test/integration_tests/python/test_download.py
@@ -5,9 +5,11 @@
 
 
 def test_download(data_builder, file_form, as_admin, api_db):
-    project = data_builder.create_project()
-    session = data_builder.create_session()
-    acquisition = data_builder.create_acquisition()
+    project = data_builder.create_project(label='project1')
+    session = data_builder.create_session(label='session1')
+    session2 = data_builder.create_session(label='session1')
+    acquisition = data_builder.create_acquisition(session=session)
+    acquisition2 = data_builder.create_acquisition(session=session2)
 
     # upload the same file to each container created and use different tags to
     # facilitate download filter tests:
@@ -16,6 +18,9 @@ def test_download(data_builder, file_form, as_admin, api_db):
     as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
         file_name, meta={'name': file_name, 'type': 'csv'}))
 
+    as_admin.post('/acquisitions/' + acquisition2 + '/files', files=file_form(
+        file_name, meta={'name': file_name, 'type': 'csv'}))
+
     as_admin.post('/sessions/' + session + '/files', files=file_form(
         file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus']}))
 
@@ -39,6 +44,7 @@ def test_download(data_builder, file_form, as_admin, api_db):
             {'level': 'project', '_id': project},
             {'level': 'session', '_id': session},
             {'level': 'acquisition', '_id': acquisition},
+            {'level': 'acquisition', '_id':acquisition2},
         ]
     })
     assert r.ok
@@ -52,8 +58,13 @@ def test_download(data_builder, file_form, as_admin, api_db):
     tar = tarfile.open(mode="r", fileobj=tar_file)
 
     # Verify a single file in tar with correct file name
+    found_second_session = False 
     for tarinfo in tar:
         assert os.path.basename(tarinfo.name) == file_name
+        if 'session1_0' in str(tarinfo.name):
+            found_second_session = True
+    assert found_second_session
+
     tar.close()
 
     # Try to perform the download from a different IP

From 7e06aab63e3c85782201653e4ba59f4c75ef51cc Mon Sep 17 00:00:00 2001
From: Harsha Kethineni <harshakethineni@flywheel.io>
Date: Wed, 6 Sep 2017 16:54:42 -0500
Subject: [PATCH 2/2] test for #917

---
 .../integration_tests/python/test_download.py | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/test/integration_tests/python/test_download.py b/test/integration_tests/python/test_download.py
index 9e675df37..3a722cfb2 100644
--- a/test/integration_tests/python/test_download.py
+++ b/test/integration_tests/python/test_download.py
@@ -67,6 +67,41 @@ def test_download(data_builder, file_form, as_admin, api_db):
 
     tar.close()
 
+    # Download one session with many acquisitions and make sure they are in the same subject folder
+
+    acquisition3 = data_builder.create_acquisition(session=session)
+    r = as_admin.post('/acquisitions/' + acquisition3 + '/files', files=file_form(
+        file_name, meta={'name': file_name, 'type': 'csv'}))
+    assert r.ok
+
+    r = as_admin.post('/download', json={
+        'optional': False,
+        'nodes': [
+            {'level': 'acquisition', '_id': acquisition},
+            {'level': 'acquisition', '_id': acquisition3},
+        ]
+    })
+    assert r.ok
+    ticket = r.json()['ticket']
+
+    # Perform the download
+    r = as_admin.get('/download', params={'ticket': ticket})
+    assert r.ok
+
+    tar_file = cStringIO.StringIO(r.content)
+    tar = tarfile.open(mode="r", fileobj=tar_file)
+
+    # Verify a single file in tar with correct file name
+    found_second_session = False 
+    for tarinfo in tar:
+        assert os.path.basename(tarinfo.name) == file_name
+        if 'session1_0' in str(tarinfo.name):
+            found_second_session = True
+    assert not found_second_session
+
+    tar.close()
+
+
     # Try to perform the download from a different IP
     update_result = api_db.downloads.update_one(
         {'_id': ticket},