Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def prefix(path, routes):
# General-purpose upload & download

route('/download', Download, h='download', m=['GET', 'POST']),
route('/download/summary', Download, h='summary', m=['POST']),
route('/upload/<strategy:label|uid|uid-match|reaper>', Upload, h='upload', m=['POST']),
route('/clean-packfiles', Upload, h='clean_packfile_tokens', m=['POST']),
route('/engine', Upload, h='engine', m=['POST']),
Expand Down
96 changes: 94 additions & 2 deletions api/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@
from . import validators
import os
from .dao.containerutil import pluralize

log = config.log

BYTES_IN_MEGABYTE = float(1<<20)

def _filter_check(property_filter, property_values):
minus = set(property_filter.get('-', []))
plus = set(property_filter.get('+', []))
if not minus.isdisjoint(property_values):
if "null" in plus and not property_values:
return True
if "null" in minus and property_values:
return False
elif not minus.isdisjoint(property_values):
return False
if plus and plus.isdisjoint(property_values):
return False
Expand Down Expand Up @@ -315,3 +319,91 @@ def download(self):
log.debug(json.dumps(req_spec, sort_keys=True, indent=4, separators=(',', ': ')))

return self._preflight_archivestream(req_spec, collection=self.get_param('collection'))

def summary(self):
"""Return a summary of what has been/will be downloaded based on a given query"""
res = {}
req = self.request.json_body
cont_query = {
'projects': {'_id': {'$in':[]}},
'sessions': {'_id': {'$in':[]}},
'acquisitions': {'_id': {'$in':[]}},
'analyses' : {'_id': {'$in':[]}}
}
for node in req:
node['_id'] = bson.ObjectId(node['_id'])
level = node['level']

containers = {'projects':0, 'sessions':0, 'acquisitions':0, 'analyses':0}

if level == 'project':
# Grab sessions and their ids
sessions = config.db.sessions.find({'project': node['_id']}, {'_id': 1})
session_ids = [s['_id'] for s in sessions]
acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}}, {'_id': 1})
acquisition_ids = [a['_id'] for a in acquisitions]

containers['projects']=1
containers['sessions']=1
containers['acquisitions']=1

# for each type of container below it will have a slightly modified match query
cont_query.get('projects',{}).get('_id',{}).get('$in').append(node['_id'])
cont_query['sessions']['_id']['$in'] = cont_query['sessions']['_id']['$in'] + session_ids
cont_query['acquisitions']['_id']['$in'] = cont_query['acquisitions']['_id']['$in'] + acquisition_ids

elif level == 'session':
acquisitions = config.db.acquisitions.find({'session': node['_id']}, {'_id': 1})
acquisition_ids = [a['_id'] for a in acquisitions]


# for each type of container below it will have a slightly modified match query
cont_query.get('sessions',{}).get('_id',{}).get('$in').append(node['_id'])
cont_query['acquisitions']['_id']['$in'] = cont_query['acquisitions']['_id']['$in'] + acquisition_ids

containers['sessions']=1
containers['acquisitions']=1

elif level == 'acquisition':

cont_query.get('acquisitions',{}).get('_id',{}).get('$in').append(node['_id'])
containers['acquisitions']=1

elif level == 'analysis':
cont_query.get('analyses',{}).get('_id',{}).get('$in').append(node['_id'])
containers['analyses'] = 1

else:
self.abort(400, "{} not a recognized level".format(level))

containers = [cont for cont in containers if containers[cont] == 1]

for cont_name in containers:
# Aggregate file types
pipeline = [
{'$match': cont_query[cont_name]},
{'$unwind': '$files'},
{'$project': {'_id': '$_id', 'type': '$files.type','mbs': {'$divide': ['$files.size', BYTES_IN_MEGABYTE]}}},
{'$group': {
'_id': '$type',
'count': {'$sum' : 1},
'mb_total': {'$sum':'$mbs'}
}}
]

try:
result = config.db.command('aggregate', cont_name, pipeline=pipeline)
except Exception as e: # pylint: disable=broad-except
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The string version of the failure would get returned to the API user. In situations like this it's better to internally log the error (log.warning(e)) and then self.abort(500, 'Failure to load summary') or something like that so the user doesn't get sent a mongo error.

log.warning(e)
self.abort(500, "Failure to load summary")

if result.get("ok"):
for doc in result.get("result"):
type_ = doc['_id']
if res.get(type_):
res[type_]['count'] += doc.get('count',0)
res[type_]['mb_total'] += doc.get('mb_total',0)
else:
res[type_] = doc
return res

66 changes: 66 additions & 0 deletions test/integration_tests/python/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,8 @@ def test_filters(data_builder, file_form, as_admin):
assert r.json()['file_cnt'] == 2

# Filter by type
as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
"test", meta={'name': "test", 'tags': ['red', 'blue']}))
r = as_admin.post('/download', json={
'optional': False,
'filters': [
Expand All @@ -399,3 +401,67 @@ def test_filters(data_builder, file_form, as_admin):
})
assert r.ok
assert r.json()['file_cnt'] == 1
r = as_admin.post('/download', json={
'optional': False,
'filters': [
{'types': {'+':['null']}}
],
'nodes': [
{'level': 'session', '_id': session},
]
})
assert r.ok
assert r.json()['file_cnt'] == 1

def test_summary(data_builder, as_admin, file_form):
project = data_builder.create_project(label='project1')
session = data_builder.create_session(label='session1')
session2 = data_builder.create_session(label='session1')
acquisition = data_builder.create_acquisition(session=session)
acquisition2 = data_builder.create_acquisition(session=session2)

# upload the same file to each container created and use different tags to
# facilitate download filter tests:
# acquisition: [], session: ['plus'], project: ['plus', 'minus']
file_name = 'test.csv'
as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv'}))

as_admin.post('/acquisitions/' + acquisition2 + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv'}))

as_admin.post('/sessions/' + session + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus']}))

as_admin.post('/projects/' + project + '/files', files=file_form(
file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus', 'minus']}))

missing_object_id = '000000000000000000000000'

r = as_admin.post('/download/summary', json=[{"level":"project", "_id":project}])
assert r.ok
assert len(r.json()) == 1
assert r.json().get("csv", {}).get("count",0) == 4

r = as_admin.post('/download/summary', json=[{"level":"session", "_id":session}])
assert r.ok
assert len(r.json()) == 1
assert r.json().get("csv", {}).get("count",0) == 2

r = as_admin.post('/download/summary', json=[{"level":"acquisition", "_id":acquisition},{"level":"acquisition", "_id":acquisition2}])
assert r.ok
assert len(r.json()) == 1
assert r.json().get("csv", {}).get("count",0) == 2

r = as_admin.post('/download/summary', json=[{"level":"group", "_id":missing_object_id}])
assert r.status_code == 400

r = as_admin.post('/sessions/' + session + '/analyses', files=file_form(
file_name, meta={'label': 'test', 'inputs':[{'name':file_name}]}))
assert r.ok
analysis = r.json()['_id']

r = as_admin.post('/download/summary', json=[{"level":"analysis", "_id":analysis}])
assert r.ok
assert len(r.json()) == 1
assert r.json().get("tabular data", {}).get("count",0) == 1