Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions api/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
filename = None

used_subpaths = {}
ids_of_paths = {}
base_query = {}
if not self.superuser_request:
base_query['permissions._id'] = self.uid
Expand Down Expand Up @@ -144,20 +145,20 @@ def _preflight_archivestream(self, req_spec, collection=None):
subject_dict[code] = session['subject']

for code, subject in subject_dict.iteritems():
subject_prefix = prefix + '/' + self._path_from_container(subject, used_subpaths, project['_id'])
subject_prefix = prefix + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, code)
subject_prefixes[code] = subject_prefix
total_size, file_cnt = self._append_targets(targets, 'subjects', subject, subject_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))

for session in session_dict.itervalues():
subject_code = session['subject'].get('code', 'unknown_subject')
subject = subject_dict[subject_code]
session_prefix = subject_prefixes[subject_code] + '/' + self._path_from_container(session, used_subpaths, subject_code)
session_prefix = subject_prefixes[subject_code] + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session["_id"])
session_prefixes[session['_id']] = session_prefix
total_size, file_cnt = self._append_targets(targets, 'sessions', session, session_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))

for acq in acquisitions:
session = session_dict[acq['session']]
acq_prefix = session_prefixes[session['_id']] + '/' + self._path_from_container(acq, used_subpaths, session['_id'])
acq_prefix = session_prefixes[session['_id']] + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id'])
total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, acq_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))


Expand All @@ -171,7 +172,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
subject = session.get('subject', {'code': 'unknown_subject'})
if not subject.get('code'):
subject['code'] = 'unknown_subject'
prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, project['_id']) + '/' + self._path_from_container(session, used_subpaths, project['_id'])
prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, subject['code']) + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session['_id'])
total_size, file_cnt = self._append_targets(targets, 'sessions', session, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))

# If the param `collection` holding a collection id is not None, filter out acquisitions that are not in the collection
Expand All @@ -181,7 +182,7 @@ def _preflight_archivestream(self, req_spec, collection=None):
acquisitions = config.db.acquisitions.find(a_query, ['label', 'files', 'uid', 'timestamp', 'timezone'])

for acq in acquisitions:
acq_prefix = prefix + '/' + self._path_from_container(acq, used_subpaths, session['_id'])
acq_prefix = prefix + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id'])
total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, acq_prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))

elif item['level'] == 'acquisition':
Expand All @@ -196,15 +197,15 @@ def _preflight_archivestream(self, req_spec, collection=None):
subject['code'] = 'unknown_subject'

project = config.db.projects.find_one({'_id': session['project']}, ['group', 'label'])
prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, project['_id']) + '/' + self._path_from_container(session, used_subpaths, project['_id']) + '/' + self._path_from_container(acq, used_subpaths, session['_id'])
prefix = project['group'] + '/' + project['label'] + '/' + self._path_from_container(subject, used_subpaths, ids_of_paths, subject['code']) + '/' + self._path_from_container(session, used_subpaths, ids_of_paths, session['_id']) + '/' + self._path_from_container(acq, used_subpaths, ids_of_paths, acq['_id'])
total_size, file_cnt = self._append_targets(targets, 'acquisitions', acq, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))

elif item['level'] == 'analysis':
analysis = config.db.analyses.find_one(base_query, ['parent', 'label', 'files', 'uid', 'timestamp'])
if not analysis:
# silently skip missing objects/objects user does not have access to
continue
prefix = self._path_from_container(analysis, used_subpaths, util.sanitize_string_to_filename(analysis['label']))
prefix = self._path_from_container(analysis, used_subpaths, ids_of_paths, util.sanitize_string_to_filename(analysis['label']))
filename = 'analysis_' + util.sanitize_string_to_filename(analysis['label']) + '.tar'
total_size, file_cnt = self._append_targets(targets, 'analyses', analysis, prefix, total_size, file_cnt, req_spec['optional'], data_path, req_spec.get('filters'))

Expand All @@ -218,16 +219,20 @@ def _preflight_archivestream(self, req_spec, collection=None):
else:
self.abort(404, 'No requested containers could be found')

def _path_from_container(self, container, used_subpaths, parent_id):
def _find_new_path(path, list_used_subpaths):
def _path_from_container(self, container, used_subpaths, ids_of_paths, _id):
def _find_new_path(path, list_used_subpaths, ids_of_paths, _id):
"""from the input path finds a path that hasn't been used"""
path = str(path).replace('/', '_')
if path not in list_used_subpaths:
if path in list_used_subpaths:
return path
elif _id == ids_of_paths.get(path,_id):
ids_of_paths[path] = _id
return path
i = 0
while True:
modified_path = path + '_' + str(i)
if modified_path not in list_used_subpaths:
ids_of_paths[modified_path] = _id
return modified_path
i += 1
path = None
Expand All @@ -245,8 +250,8 @@ def _find_new_path(path, list_used_subpaths):
path = container['code']
if not path:
path = 'untitled'
path = _find_new_path(path, used_subpaths.get(parent_id, []))
used_subpaths[parent_id] = used_subpaths.get(parent_id, []) + [path]
path = _find_new_path(path, used_subpaths.get(id, []), ids_of_paths, _id)
used_subpaths[_id] = used_subpaths.get(_id, []) + [path]
return path

def archivestream(self, ticket):
Expand Down
52 changes: 49 additions & 3 deletions test/integration_tests/python/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@


def test_download(data_builder, file_form, as_admin, api_db):
project = data_builder.create_project()
session = data_builder.create_session()
acquisition = data_builder.create_acquisition()
project = data_builder.create_project(label='project1')
session = data_builder.create_session(label='session1')
session2 = data_builder.create_session(label='session1')
acquisition = data_builder.create_acquisition(session=session)
acquisition2 = data_builder.create_acquisition(session=session2)

# upload the same file to each container created and use different tags to
# facilitate download filter tests:
Expand All @@ -16,6 +18,9 @@ def test_download(data_builder, file_form, as_admin, api_db):
as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv'}))

as_admin.post('/acquisitions/' + acquisition2 + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv'}))

as_admin.post('/sessions/' + session + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus']}))

Expand All @@ -39,6 +44,41 @@ def test_download(data_builder, file_form, as_admin, api_db):
{'level': 'project', '_id': project},
{'level': 'session', '_id': session},
{'level': 'acquisition', '_id': acquisition},
{'level': 'acquisition', '_id':acquisition2},
]
})
assert r.ok
ticket = r.json()['ticket']

# Perform the download
r = as_admin.get('/download', params={'ticket': ticket})
assert r.ok

tar_file = cStringIO.StringIO(r.content)
tar = tarfile.open(mode="r", fileobj=tar_file)

# Verify a single file in tar with correct file name
found_second_session = False
for tarinfo in tar:
assert os.path.basename(tarinfo.name) == file_name
if 'session1_0' in str(tarinfo.name):
found_second_session = True
assert found_second_session

tar.close()

# Download one session with many acquisitions and make sure they are in the same subject folder

acquisition3 = data_builder.create_acquisition(session=session)
r = as_admin.post('/acquisitions/' + acquisition3 + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv'}))
assert r.ok

r = as_admin.post('/download', json={
'optional': False,
'nodes': [
{'level': 'acquisition', '_id': acquisition},
{'level': 'acquisition', '_id': acquisition3},
]
})
assert r.ok
Expand All @@ -52,10 +92,16 @@ def test_download(data_builder, file_form, as_admin, api_db):
tar = tarfile.open(mode="r", fileobj=tar_file)

# Verify a single file in tar with correct file name
found_second_session = False
for tarinfo in tar:
assert os.path.basename(tarinfo.name) == file_name
if 'session1_0' in str(tarinfo.name):
found_second_session = True
assert not found_second_session

tar.close()


# Try to perform the download from a different IP
update_result = api_db.downloads.update_one(
{'_id': ticket},
Expand Down