From 6a4965922c484697e5509200f570550df3799195 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Sun, 15 May 2016 21:38:40 -0500 Subject: [PATCH 01/11] Use placer for engine upload --- api/dao/hierarchy.py | 87 ++++++++++++++++++++++---------------------- api/placer.py | 6 +-- 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/api/dao/hierarchy.py b/api/dao/hierarchy.py index 10dda3d07..a6d18ffc6 100644 --- a/api/dao/hierarchy.py +++ b/api/dao/hierarchy.py @@ -334,53 +334,54 @@ def dict_fileinfos(infos): return dict_infos -def update_container_hierarchy(metadata, acquisition_id, level): - project = metadata.get('project') - session = metadata.get('session') - acquisition = metadata.get('acquisition') +def update_container_hierarchy(metadata, cid, container_type): + c_metadata = metadata.get(container_type) now = datetime.datetime.utcnow() - if acquisition.get('timestamp'): - acquisition['timestamp'] = dateutil.parser.parse(acquisition['timestamp']) - acquisition['modified'] = now - acquisition_obj = _update_container({'_id': acquisition_id}, acquisition, 'acquisitions') - if acquisition_obj is None: - raise APIStorageException('acquisition doesn''t exist') - if acquisition.get('timestamp'): - session_obj = config.db.sessions.find_one_and_update( - {'_id': acquisition_obj['session']}, - { - '$min': dict(timestamp=acquisition['timestamp']), - '$set': dict(timezone=acquisition.get('timezone')) - }, - return_document=pymongo.collection.ReturnDocument.AFTER - ) - config.db.projects.find_one_and_update( - {'_id': session_obj['project']}, - { - '$max': dict(timestamp=acquisition['timestamp']), - '$set': dict(timezone=acquisition.get('timezone')) - } - ) - session_obj = None - if session: - session['modified'] = now - session_obj = _update_container({'_id': acquisition_obj['session']}, session, 'sessions') - if project: - project['modified'] = now - if not session_obj: - session_obj = config.db.sessions.find_one({'_id': acquisition_obj['session']}) - _update_container({'_id': session_obj['project']}, project, 'projects') - return acquisition_obj - -def _update_container(query, update, cont_name): - return config.db[cont_name].find_one_and_update( - query, - { - '$set': util.mongo_dict(update) - }, + if c_metadata.get('timestamp'): + c_metadata['timestamp'] = dateutil.parser.parse(c_metadata['timestamp']) + c_metadata['modified'] = now + c_obj = _update_container({'_id': cid}, {}, c_metadata, container_type) + if c_obj is None: + raise APIStorageException('container doesn''t exist') + if container_type in ['session', 'acquisition']: + update_timestamp = True if c_metadata.get('timestamp') else False + _update_hierarchy(c_obj, container_type, metadata, update_timestamp) + return c_obj + + +def _update_container(query, update, set_update, cont_name): + update['$set'] = util.mongo_dict(set_update) + return config.db[cont_name].find_one_and_update(query,update, return_document=pymongo.collection.ReturnDocument.AFTER ) + +def _update_hierarchy(container, container_type, metadata, update_timestamp=False): + project_id = container['project'] # for sessions + + if container_type == 'acquisition': + update = {} + session = metadata.get('session', {}) + if update_timestamp: + update['$min'] = dict(timestamp=container['timestamp']) + session['timezone'] = dict(timezone=container.get('timezone')) + if session.keys(): + session['modified'] = now + session_obj = _update_container({'_id': container['session']}, update, session, 'sessions') + project_id = session_obj['project'] + + if project_id is None: + raise APIStorageException('Failed to find project id in session obj') + update = {} + project = metadata.get('project', {}) + if update_timestamp: + update['$max'] = dict(timestamp=container['timestamp']) + project['timezone'] = dict(timezone=container.get('timezone')) + if project.keys(): + project['modified'] = now + project_obj = _update_container({'_id': project_id}, update, project, 'projects') + + def merge_fileinfos(parsed_files, infos): """it takes a dictionary of "hard_infos" (file size, hash) merging them with infos derived from a list of infos on the same or on other files diff --git a/api/placer.py b/api/placer.py index 0d216e12c..7fad26cd7 100644 --- a/api/placer.py +++ b/api/placer.py @@ -193,14 +193,12 @@ def check(self): validators.validate_data(self.metadata, 'enginemetadata.json', 'input', 'POST', optional=True) self.saved = [] - # Could avoid loops in process_file_field by setting up the - def process_file_field(self, field, info): if self.metadata is not None: # OPPORTUNITY: hard-coded container levels will need to go away soon # Engine shouldn't know about container names; maybe parent contexts? # How will this play with upload unification? Unify schemas as well? - file_mds = self.metadata.get('acquisition', {}).get('files', []) + file_mds = self.metadata.get(container_type, {}).get('files', []) for file_md in file_mds: if file_md['name'] == info['name']: @@ -218,7 +216,7 @@ def finalize(self): # Updating various properties of the hierarchy; currently assumes acquisitions; might need fixing for other levels. # NOTE: only called in EnginePlacer bid = bson.ObjectId(self.id) - self.obj = hierarchy.update_container_hierarchy(self.metadata, bid, '') + self.obj = hierarchy.update_container_hierarchy(self.metadata, bid, self.container_type) return self.saved From c551769d77ead66e852589c2d6fc26063a736a1a Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Mon, 23 May 2016 17:55:56 -0500 Subject: [PATCH 02/11] Add start to engine placer tests --- test/integration_tests/test_uploads.py | 52 ++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/integration_tests/test_uploads.py b/test/integration_tests/test_uploads.py index 048e715a3..ca41671f5 100644 --- a/test/integration_tests/test_uploads.py +++ b/test/integration_tests/test_uploads.py @@ -42,6 +42,33 @@ def teardown_db(): fixture_data.files = files return fixture_data +@pytest.fixture() +def with_hierarchyand_file_data(api_as_admin, bunch, request, data_builder): + group = data_builder.create_group('test_upload_' + str(int(time.time() * 1000))) + project = data_builder.create_project(group) + session = data_builder.create_session(project) + acquisition = data_builder.create_acquisition(session) + + file_names = ['one.csv', 'two.csv'] + files = {} + for i, name in enumerate(file_names): + files['file' + str(i+1)] = (name, 'some,data,to,send\nanother,row,to,send\n') + + def teardown_db(): + data_builder.delete_acquisition(acquisition) + data_builder.delete_session(session) + data_builder.delete_project(project) + data_builder.delete_group(group) + + request.addfinalizer(teardown_db) + + fixture_data = bunch.create() + fixture_data.group = group + fixture_data.project = project + fixture_data.session = session + fixture_data.acquisition = acquisition + return fixture_data + def test_uid_upload(with_group_and_file_data, api_as_admin): data = with_group_and_file_data @@ -124,4 +151,29 @@ def test_label_upload(with_group_and_file_data, api_as_admin): r = api_as_admin.post('/upload/label', files=data.files) assert r.status_code == 400 +def test_acquisition_engine_upload(with_hierarchyand_file_data, api_as_admin): + data = with_group_and_file_data + metadata = { + 'session':{ + 'label':'test_session', + 'files':[ + { + 'name':data.files.keys()[1] + } + ], + 'subject': {'code': 'test_subject'} + }, + 'acquisition':{ + 'label':'test_acquisition', + 'files':[ + { + 'name':data.files.keys()[2] + } + ] + } + } + metadata = json.dumps(metadata) + + r = api_as_admin.post('/upload/label', files=data.files) + assert r.ok From 44b3f5d79f4b64e82406f230b1e0ef4f4dcf86f8 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Wed, 1 Jun 2016 15:11:12 -0500 Subject: [PATCH 03/11] Use base upload for engine upload --- api/dao/hierarchy.py | 1 + api/placer.py | 2 +- api/upload.py | 83 ++++++-------------------------------------- 3 files changed, 12 insertions(+), 74 deletions(-) diff --git a/api/dao/hierarchy.py b/api/dao/hierarchy.py index a6d18ffc6..24879635f 100644 --- a/api/dao/hierarchy.py +++ b/api/dao/hierarchy.py @@ -358,6 +358,7 @@ def _update_container(query, update, set_update, cont_name): def _update_hierarchy(container, container_type, metadata, update_timestamp=False): project_id = container['project'] # for sessions + now = datetime.datetime.utcnow() if container_type == 'acquisition': update = {} diff --git a/api/placer.py b/api/placer.py index 7fad26cd7..298560d2d 100644 --- a/api/placer.py +++ b/api/placer.py @@ -198,7 +198,7 @@ def process_file_field(self, field, info): # OPPORTUNITY: hard-coded container levels will need to go away soon # Engine shouldn't know about container names; maybe parent contexts? # How will this play with upload unification? Unify schemas as well? - file_mds = self.metadata.get(container_type, {}).get('files', []) + file_mds = self.metadata.get(self.container_type, {}).get('files', []) for file_md in file_mds: if file_md['name'] == info['name']: diff --git a/api/upload.py b/api/upload.py index 2d324f2a7..43121f80a 100644 --- a/api/upload.py +++ b/api/upload.py @@ -213,86 +213,23 @@ def engine(self): :query id: container_id :query job: job_id - :statuscode 400: describe me - :statuscode 402: describe me - :statuscode 404: describe me + :statuscode 400: improper or missing params + :statuscode 402: engine uploads must be fron authorized drone """ if not self.superuser_request: self.abort(402, 'uploads must be from an authorized drone') - level = self.get_param('level') if level is None: - self.abort(404, 'container level is required') - - cont_id = self.get_param('id') - if not cont_id: - self.abort(404, 'container id is required') + self.abort(400, 'container level is required') + if level not in ['acquisition', 'session', 'project']: + self.abort(400, 'container level must be acquisition, session or project.') + cid = self.get_param('id') + if not cid: + self.abort(400, 'container id is required') else: - cont_id = bson.ObjectId(cont_id) - if level not in ['acquisition', 'analysis']: - self.abort(404, 'engine uploads are supported only at the acquisition or analysis level') - - if level == 'analysis': - context = {'job_id': self.get_param('job')} - return process_upload(self.request, Strategy.analysis_job, origin=self.origin, container_type=level, id=cont_id, context=context) - - if not self.superuser_request: - self.abort(402, 'uploads must be from an authorized drone') - with tempfile.TemporaryDirectory(prefix='.tmp', dir=config.get_item('persistent', 'data_path')) as tempdir_path: - try: - file_store = files.MultiFileStore(self.request, tempdir_path) - except files.FileStoreException as e: - self.abort(400, str(e)) - if not file_store.metadata: - self.abort(400, 'metadata is missing') - payload_schema_uri = util.schema_uri('input', 'enginemetadata.json') - metadata_validator = validators.from_schema_path(payload_schema_uri) - metadata_validator(file_store.metadata, 'POST') - file_infos = file_store.metadata['acquisition'].pop('files', []) - now = datetime.datetime.utcnow() - try: - acquisition_obj = hierarchy.update_container_hierarchy(file_store.metadata, cont_id, level) - except APIStorageException as e: - self.abort(400, e.message) - # move the files before updating the database - for name, parsed_file in file_store.files.items(): - fileinfo = parsed_file.info - target_path = os.path.join(config.get_item('persistent', 'data_path'), util.path_from_hash(fileinfo['hash'])) - files.move_file(parsed_file.path, target_path) - # merge infos from the actual file and from the metadata - merged_files = hierarchy.merge_fileinfos(file_store.files, file_infos) - # update the fileinfo in mongo if a file already exists - for f in acquisition_obj['files']: - merged_file = merged_files.get(f['name']) - if merged_file: - fileinfo = merged_file.info - fileinfo['modified'] = now - acquisition_obj = hierarchy.update_fileinfo('acquisitions', acquisition_obj['_id'], fileinfo) - fileinfo['existing'] = True - # create the missing fileinfo in mongo - for name, merged_file in merged_files.items(): - fileinfo = merged_file.info - # if the file exists we don't need to create it - # skip update fileinfo for files that don't have a path - if not fileinfo.get('existing') and merged_file.path: - fileinfo['mimetype'] = fileinfo.get('mimetype') or util.guess_mimetype(name) - fileinfo['created'] = now - fileinfo['modified'] = now - fileinfo['origin'] = self.origin - acquisition_obj = hierarchy.add_fileinfo('acquisitions', acquisition_obj['_id'], fileinfo) - - for f in acquisition_obj['files']: - if f['name'] in file_store.files: - file_ = { - 'name': f['name'], - 'hash': f['hash'], - 'type': f.get('type'), - 'measurements': f.get('measurements', []), - 'mimetype': f.get('mimetype') - } - rules.create_jobs(config.db, acquisition_obj, 'acquisition', file_) - return [{'name': k, 'hash': v.info.get('hash'), 'size': v.info.get('size')} for k, v in merged_files.items()] + cid = bson.ObjectId(acquisition_id) + return process_upload(self.request, 'engine', container_type=level, id=cid, origin=self.origin) def clean_packfile_tokens(self): """ From a45223c6c63dd41786e4c3f41574ab2e64dc1942 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Mon, 20 Jun 2016 18:19:59 -0500 Subject: [PATCH 04/11] Clean up update hierarcy logic --- api/dao/hierarchy.py | 14 +++++++++----- api/upload.py | 13 +++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/api/dao/hierarchy.py b/api/dao/hierarchy.py index 24879635f..d983b7203 100644 --- a/api/dao/hierarchy.py +++ b/api/dao/hierarchy.py @@ -342,33 +342,37 @@ def update_container_hierarchy(metadata, cid, container_type): c_metadata['modified'] = now c_obj = _update_container({'_id': cid}, {}, c_metadata, container_type) if c_obj is None: - raise APIStorageException('container doesn''t exist') + raise APIStorageException('container does not exist') if container_type in ['session', 'acquisition']: update_timestamp = True if c_metadata.get('timestamp') else False _update_hierarchy(c_obj, container_type, metadata, update_timestamp) return c_obj -def _update_container(query, update, set_update, cont_name): +def _update_container(query, update, set_update, container_type): + coll_name = container_type if container_type.endswith('s') else container_type+'s' update['$set'] = util.mongo_dict(set_update) - return config.db[cont_name].find_one_and_update(query,update, + return config.db[coll_name].find_one_and_update(query,update, return_document=pymongo.collection.ReturnDocument.AFTER ) def _update_hierarchy(container, container_type, metadata, update_timestamp=False): - project_id = container['project'] # for sessions + project_id = container.get('project') # for sessions now = datetime.datetime.utcnow() if container_type == 'acquisition': update = {} session = metadata.get('session', {}) + session_obj = None if update_timestamp: update['$min'] = dict(timestamp=container['timestamp']) session['timezone'] = dict(timezone=container.get('timezone')) - if session.keys(): + if update.keys() or session.keys(): session['modified'] = now session_obj = _update_container({'_id': container['session']}, update, session, 'sessions') + if session_obj is None: + session_obj = get_container('session', container['session']) project_id = session_obj['project'] if project_id is None: diff --git a/api/upload.py b/api/upload.py index 43121f80a..063ff83f8 100644 --- a/api/upload.py +++ b/api/upload.py @@ -222,14 +222,19 @@ def engine(self): level = self.get_param('level') if level is None: self.abort(400, 'container level is required') - if level not in ['acquisition', 'session', 'project']: - self.abort(400, 'container level must be acquisition, session or project.') + if level not in ['analysis', 'acquisition', 'session', 'project']: + self.abort(400, 'container level must be analysis, acquisition, session or project.') cid = self.get_param('id') if not cid: self.abort(400, 'container id is required') else: - cid = bson.ObjectId(acquisition_id) - return process_upload(self.request, 'engine', container_type=level, id=cid, origin=self.origin) + cid = bson.ObjectId(cid) + + if level == 'analysis': + context = {'job_id': self.get_param('job')} + return process_upload(self.request, Strategy.analysis_job, origin=self.origin, container_type=level, id=cont_id, context=context) + else: + return process_upload(self.request, Strategy.engine, container_type=level, id=cid, origin=self.origin) def clean_packfile_tokens(self): """ From 882ca9cbbbbcba671fde09a2635347a5858c240e Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Tue, 21 Jun 2016 14:35:21 -0500 Subject: [PATCH 05/11] Add engine acquisition test --- test/integration_tests/test_uploads.py | 70 ++++++++++++++++++++------ 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/test/integration_tests/test_uploads.py b/test/integration_tests/test_uploads.py index ca41671f5..01b9b6eb5 100644 --- a/test/integration_tests/test_uploads.py +++ b/test/integration_tests/test_uploads.py @@ -43,7 +43,7 @@ def teardown_db(): return fixture_data @pytest.fixture() -def with_hierarchyand_file_data(api_as_admin, bunch, request, data_builder): +def with_hierarchy_and_file_data(api_as_admin, bunch, request, data_builder): group = data_builder.create_group('test_upload_' + str(int(time.time() * 1000))) project = data_builder.create_project(group) session = data_builder.create_session(project) @@ -151,29 +151,71 @@ def test_label_upload(with_group_and_file_data, api_as_admin): r = api_as_admin.post('/upload/label', files=data.files) assert r.status_code == 400 -def test_acquisition_engine_upload(with_hierarchyand_file_data, api_as_admin): - data = with_group_and_file_data +def find_file_in_array(filename, files): + for f in files: + if f.get('name') == filename: + return f + +def test_acquisition_engine_upload(with_hierarchy_and_file_data, api_as_admin): + + data = with_hierarchy_and_file_data metadata = { + 'project':{ + 'label': 'engine project', + 'metadata': {'test': 'p'} + }, 'session':{ - 'label':'test_session', - 'files':[ - { - 'name':data.files.keys()[1] - } - ], - 'subject': {'code': 'test_subject'} + 'label': 'engine session', + 'subject': {'code': 'engine subject'}, + 'metadata': {'test': 's'} }, 'acquisition':{ - 'label':'test_acquisition', + 'label': 'engine acquisition', + 'timestamp': '2016-06-20T21:57:36.636808+00:00' + 'metadata': {'test': 'a'} 'files':[ { - 'name':data.files.keys()[2] + 'name': data.files.keys()[0], + 'type': 'engine type 0', + 'metadata': {'test': 'f0'} + }, + { + 'name': data.files.keys()[1], + 'type': 'engine type 1', + 'metadata': {'test': 'f1'} } ] } } - metadata = json.dumps(metadata) + data.files['metadata'] = json.dumps(metadata) - r = api_as_admin.post('/upload/label', files=data.files) + r = api_as_admin.post('/engine?level=acquisition&id=data.acquisition', files=data.files) assert r.ok + r = api_as_admin.get('/projects/' + data.project) + assert r.ok + p = json.loads(r.content) + assert p['label'] == metadata['project']['label'] + assert p['timestamp'] == metadata['acquisition']['timestamp'] + assert cmp(p['metadata'], metadata['project']['metadata']) == 0 + + r = api_as_admin.get('/sessions/' + data.session) + assert r.ok + s = json.loads(r.content) + assert s['label'] == metadata['session']['label'] + assert s['timestamp'] == metadata['acquisition']['timestamp'] + assert cmp(s['metadata'], metadata['session']['metadata']) == 0 + assert cmp(s['subject'], metadata['session']['subject']) == 0 + + r = api_as_admin.get('/acquisitions/' + data.acquisition) + assert r.ok + a = json.loads(r.content) + assert a['label'] == metadata['session']['label'] + assert a['timestamp'] == metadata['acquisition']['timestamp'] + assert cmp(a['metadata'], metadata['session']['metadata']) == 0 + + for f in a['files']: + mf = find_file_in_array(f['name'], metadata) + assert mf is not None + assert f['type'] == mf['type'] + assert cmp(f['metadata'], mf['metadata']) == 0 From b1824be1af05c77447ea9a0bac0c6f6ba9c5c553 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Mon, 27 Jun 2016 12:40:45 -0500 Subject: [PATCH 06/11] Add additional tests for engine placer --- api/placer.py | 10 +- api/schemas/input/enginemetadata.json | 1 - api/upload.py | 3 +- test/integration_tests/test_uploads.py | 132 ++++++++++++++++++++++--- 4 files changed, 129 insertions(+), 17 deletions(-) diff --git a/api/placer.py b/api/placer.py index 298560d2d..a3a09bdc2 100644 --- a/api/placer.py +++ b/api/placer.py @@ -190,7 +190,10 @@ class EnginePlacer(Placer): def check(self): self.requireTarget() - validators.validate_data(self.metadata, 'enginemetadata.json', 'input', 'POST', optional=True) + if self.metadata is not None: + log.debug('about to validate') + validators.validate_data(self.metadata, 'enginemetadata.json', 'input', 'POST', optional=True) + log.debug('validated') self.saved = [] def process_file_field(self, field, info): @@ -215,8 +218,9 @@ def process_file_field(self, field, info): def finalize(self): # Updating various properties of the hierarchy; currently assumes acquisitions; might need fixing for other levels. # NOTE: only called in EnginePlacer - bid = bson.ObjectId(self.id) - self.obj = hierarchy.update_container_hierarchy(self.metadata, bid, self.container_type) + if self.metadata is not None: + bid = bson.ObjectId(self.id) + self.obj = hierarchy.update_container_hierarchy(self.metadata, bid, self.container_type) return self.saved diff --git a/api/schemas/input/enginemetadata.json b/api/schemas/input/enginemetadata.json index 47f66bf01..c400a9944 100644 --- a/api/schemas/input/enginemetadata.json +++ b/api/schemas/input/enginemetadata.json @@ -53,6 +53,5 @@ "additionalProperties": false } }, - "required": ["acquisition"], "additionalProperties": false } diff --git a/api/upload.py b/api/upload.py index 063ff83f8..89d7bb1af 100644 --- a/api/upload.py +++ b/api/upload.py @@ -134,7 +134,6 @@ def process_upload(request, strategy, container_type=None, id=None, origin=None, if placer.sse and not response: raise Exception("Programmer error: response required") elif placer.sse: - log.debug('SSE') response.headers['Content-Type'] = 'text/event-stream; charset=utf-8' response.headers['Connection'] = 'keep-alive' response.app_iter = placer.finalize() @@ -232,7 +231,7 @@ def engine(self): if level == 'analysis': context = {'job_id': self.get_param('job')} - return process_upload(self.request, Strategy.analysis_job, origin=self.origin, container_type=level, id=cont_id, context=context) + return process_upload(self.request, Strategy.analysis_job, origin=self.origin, container_type=level, id=cid, context=context) else: return process_upload(self.request, Strategy.engine, container_type=level, id=cid, origin=self.origin) diff --git a/test/integration_tests/test_uploads.py b/test/integration_tests/test_uploads.py index 01b9b6eb5..77cb95959 100644 --- a/test/integration_tests/test_uploads.py +++ b/test/integration_tests/test_uploads.py @@ -1,3 +1,5 @@ +import datetime +import dateutil.parser import os import json import time @@ -67,6 +69,7 @@ def teardown_db(): fixture_data.project = project fixture_data.session = session fixture_data.acquisition = acquisition + fixture_data.files = files return fixture_data @@ -171,8 +174,8 @@ def test_acquisition_engine_upload(with_hierarchy_and_file_data, api_as_admin): }, 'acquisition':{ 'label': 'engine acquisition', - 'timestamp': '2016-06-20T21:57:36.636808+00:00' - 'metadata': {'test': 'a'} + 'timestamp': '2016-06-20T21:57:36+00:00', + 'metadata': {'test': 'a'}, 'files':[ { 'name': data.files.keys()[0], @@ -187,35 +190,142 @@ def test_acquisition_engine_upload(with_hierarchy_and_file_data, api_as_admin): ] } } - data.files['metadata'] = json.dumps(metadata) + data.files['metadata'] = ('', json.dumps(metadata)) - r = api_as_admin.post('/engine?level=acquisition&id=data.acquisition', files=data.files) + r = api_as_admin.post('/engine?level=acquisition&id='+data.acquisition, files=data.files) assert r.ok r = api_as_admin.get('/projects/' + data.project) assert r.ok p = json.loads(r.content) assert p['label'] == metadata['project']['label'] - assert p['timestamp'] == metadata['acquisition']['timestamp'] assert cmp(p['metadata'], metadata['project']['metadata']) == 0 r = api_as_admin.get('/sessions/' + data.session) assert r.ok s = json.loads(r.content) assert s['label'] == metadata['session']['label'] - assert s['timestamp'] == metadata['acquisition']['timestamp'] assert cmp(s['metadata'], metadata['session']['metadata']) == 0 - assert cmp(s['subject'], metadata['session']['subject']) == 0 + assert s['subject']['code'] == metadata['session']['subject']['code'] r = api_as_admin.get('/acquisitions/' + data.acquisition) assert r.ok a = json.loads(r.content) - assert a['label'] == metadata['session']['label'] - assert a['timestamp'] == metadata['acquisition']['timestamp'] - assert cmp(a['metadata'], metadata['session']['metadata']) == 0 + assert a['label'] == metadata['acquisition']['label'] + a_timestamp = dateutil.parser.parse(a['timestamp']) + m_timestamp = dateutil.parser.parse(metadata['acquisition']['timestamp']) + assert a_timestamp == m_timestamp + assert cmp(a['metadata'], metadata['acquisition']['metadata']) == 0 for f in a['files']: - mf = find_file_in_array(f['name'], metadata) + mf = find_file_in_array(f['name'], metadata['acquisition']['files']) + assert mf is not None + assert f['type'] == mf['type'] + assert cmp(f['metadata'], mf['metadata']) == 0 + +def test_session_engine_upload(with_hierarchy_and_file_data, api_as_admin): + + data = with_hierarchy_and_file_data + metadata = { + 'project':{ + 'label': 'engine project', + 'metadata': {'test': 'p'} + }, + 'session':{ + 'label': 'engine session', + 'subject': {'code': 'engine subject'}, + 'timestamp': '2016-06-20T21:57:36+00:00', + 'metadata': {'test': 's'}, + 'files':[ + { + 'name': data.files.keys()[0], + 'type': 'engine type 0', + 'metadata': {'test': 'f0'} + }, + { + 'name': data.files.keys()[1], + 'type': 'engine type 1', + 'metadata': {'test': 'f1'} + } + ] + } + } + data.files['metadata'] = ('', json.dumps(metadata)) + + r = api_as_admin.post('/engine?level=session&id='+data.session, files=data.files) + assert r.ok + + r = api_as_admin.get('/projects/' + data.project) + assert r.ok + p = json.loads(r.content) + assert p['label'] == metadata['project']['label'] + assert cmp(p['metadata'], metadata['project']['metadata']) == 0 + + r = api_as_admin.get('/sessions/' + data.session) + assert r.ok + s = json.loads(r.content) + assert s['label'] == metadata['session']['label'] + assert cmp(s['metadata'], metadata['session']['metadata']) == 0 + assert s['subject']['code'] == metadata['session']['subject']['code'] + s_timestamp = dateutil.parser.parse(s['timestamp']) + m_timestamp = dateutil.parser.parse(metadata['session']['timestamp']) + assert s_timestamp == m_timestamp + + for f in s['files']: + mf = find_file_in_array(f['name'], metadata['session']['files']) assert mf is not None assert f['type'] == mf['type'] assert cmp(f['metadata'], mf['metadata']) == 0 + +def test_project_engine_upload(with_hierarchy_and_file_data, api_as_admin): + + data = with_hierarchy_and_file_data + metadata = { + 'project':{ + 'label': 'engine project', + 'metadata': {'test': 'p'}, + 'files':[ + { + 'name': data.files.keys()[0], + 'type': 'engine type 0', + 'metadata': {'test': 'f0'} + }, + { + 'name': data.files.keys()[1], + 'type': 'engine type 1', + 'metadata': {'test': 'f1'} + } + ] + } + } + data.files['metadata'] = ('', json.dumps(metadata)) + + r = api_as_admin.post('/engine?level=project&id='+data.project, files=data.files) + assert r.ok + + r = api_as_admin.get('/projects/' + data.project) + assert r.ok + p = json.loads(r.content) + assert p['label'] == metadata['project']['label'] + assert cmp(p['metadata'], metadata['project']['metadata']) == 0 + + for f in p['files']: + mf = find_file_in_array(f['name'], metadata['project']['files']) + assert mf is not None + assert f['type'] == mf['type'] + assert cmp(f['metadata'], mf['metadata']) == 0 + +def test_acquisition_file_only_engine_upload(with_hierarchy_and_file_data, api_as_admin): + + data = with_hierarchy_and_file_data + + r = api_as_admin.post('/engine?level=acquisition&id='+data.acquisition, files=data.files) + assert r.ok + + r = api_as_admin.get('/acquisitions/' + data.acquisition) + assert r.ok + a = json.loads(r.content) + + for k,v in data.files.items(): + mf = find_file_in_array(v[0], a['files']) + assert mf is not None From 4f8aa48276556942c01eb167b0834a8c73c9ee96 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Tue, 28 Jun 2016 15:03:27 -0500 Subject: [PATCH 07/11] Add test for metadata without files --- test/integration_tests/test_uploads.py | 47 ++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/test/integration_tests/test_uploads.py b/test/integration_tests/test_uploads.py index 77cb95959..7c46c6b90 100644 --- a/test/integration_tests/test_uploads.py +++ b/test/integration_tests/test_uploads.py @@ -329,3 +329,50 @@ def test_acquisition_file_only_engine_upload(with_hierarchy_and_file_data, api_a for k,v in data.files.items(): mf = find_file_in_array(v[0], a['files']) assert mf is not None + +def test_acquisition_metadata_only_engine_upload(with_hierarchy_and_file_data, api_as_admin): + + data = with_hierarchy_and_file_data + metadata = { + 'project':{ + 'label': 'engine project', + 'metadata': {'test': 'p'} + }, + 'session':{ + 'label': 'engine session', + 'subject': {'code': 'engine subject'}, + 'metadata': {'test': 's'} + }, + 'acquisition':{ + 'label': 'engine acquisition', + 'timestamp': '2016-06-20T21:57:36+00:00', + 'metadata': {'test': 'a'} + } + } + data.files = {} + data.files['metadata'] = ('', json.dumps(metadata)) + + r = api_as_admin.post('/engine?level=acquisition&id='+data.acquisition, files=data.files) + assert r.ok + + r = api_as_admin.get('/projects/' + data.project) + assert r.ok + p = json.loads(r.content) + assert p['label'] == metadata['project']['label'] + assert cmp(p['metadata'], metadata['project']['metadata']) == 0 + + r = api_as_admin.get('/sessions/' + data.session) + assert r.ok + s = json.loads(r.content) + assert s['label'] == metadata['session']['label'] + assert cmp(s['metadata'], metadata['session']['metadata']) == 0 + assert s['subject']['code'] == metadata['session']['subject']['code'] + + r = api_as_admin.get('/acquisitions/' + data.acquisition) + assert r.ok + a = json.loads(r.content) + assert a['label'] == metadata['acquisition']['label'] + a_timestamp = dateutil.parser.parse(a['timestamp']) + m_timestamp = dateutil.parser.parse(metadata['acquisition']['timestamp']) + assert a_timestamp == m_timestamp + assert cmp(a['metadata'], metadata['acquisition']['metadata']) == 0 From 833a91f194c883d96e73d5432baa6b813a90090d Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Tue, 28 Jun 2016 15:14:27 -0500 Subject: [PATCH 08/11] Cleanup debug logs --- api/placer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/placer.py b/api/placer.py index a3a09bdc2..43b338a04 100644 --- a/api/placer.py +++ b/api/placer.py @@ -191,9 +191,7 @@ class EnginePlacer(Placer): def check(self): self.requireTarget() if self.metadata is not None: - log.debug('about to validate') validators.validate_data(self.metadata, 'enginemetadata.json', 'input', 'POST', optional=True) - log.debug('validated') self.saved = [] def process_file_field(self, field, info): From 09b163f3c70eea369bc2f04c4501eb23e70c0454 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Tue, 5 Jul 2016 11:31:07 -0500 Subject: [PATCH 09/11] Only explicitly set timestamp --- api/dao/hierarchy.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/api/dao/hierarchy.py b/api/dao/hierarchy.py index d983b7203..fee634338 100644 --- a/api/dao/hierarchy.py +++ b/api/dao/hierarchy.py @@ -344,8 +344,7 @@ def update_container_hierarchy(metadata, cid, container_type): if c_obj is None: raise APIStorageException('container does not exist') if container_type in ['session', 'acquisition']: - update_timestamp = True if c_metadata.get('timestamp') else False - _update_hierarchy(c_obj, container_type, metadata, update_timestamp) + _update_hierarchy(c_obj, container_type, metadata) return c_obj @@ -357,34 +356,26 @@ def _update_container(query, update, set_update, container_type): ) -def _update_hierarchy(container, container_type, metadata, update_timestamp=False): +def _update_hierarchy(container, container_type, metadata): project_id = container.get('project') # for sessions now = datetime.datetime.utcnow() if container_type == 'acquisition': - update = {} session = metadata.get('session', {}) session_obj = None - if update_timestamp: - update['$min'] = dict(timestamp=container['timestamp']) - session['timezone'] = dict(timezone=container.get('timezone')) - if update.keys() or session.keys(): + if session.keys(): session['modified'] = now - session_obj = _update_container({'_id': container['session']}, update, session, 'sessions') + session_obj = _update_container({'_id': container['session']}, {}, session, 'sessions') if session_obj is None: session_obj = get_container('session', container['session']) project_id = session_obj['project'] if project_id is None: raise APIStorageException('Failed to find project id in session obj') - update = {} project = metadata.get('project', {}) - if update_timestamp: - update['$max'] = dict(timestamp=container['timestamp']) - project['timezone'] = dict(timezone=container.get('timezone')) if project.keys(): project['modified'] = now - project_obj = _update_container({'_id': project_id}, update, project, 'projects') + project_obj = _update_container({'_id': project_id}, {}, project, 'projects') def merge_fileinfos(parsed_files, infos): From bd06eeb6f2aed76b26ec94a6c7136a5041fb3018 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Wed, 6 Jul 2016 12:14:57 -0500 Subject: [PATCH 10/11] Add test for subsequent file upload --- test/integration_tests/test_uploads.py | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/integration_tests/test_uploads.py b/test/integration_tests/test_uploads.py index 7c46c6b90..9b3e3251e 100644 --- a/test/integration_tests/test_uploads.py +++ b/test/integration_tests/test_uploads.py @@ -330,6 +330,61 @@ def test_acquisition_file_only_engine_upload(with_hierarchy_and_file_data, api_a mf = find_file_in_array(v[0], a['files']) assert mf is not None +def test_acquisition_subsequent_file_engine_upload(with_hierarchy_and_file_data, api_as_admin): + + data = with_hierarchy_and_file_data + + filedata_1 = {} + filedata_1['file1'] = ('file-one.csv', 'some,data,to,send\nanother,row,to,send\n') + filedata_1['metadata'] = ('', json.dumps({ + 'acquisition':{ + 'files':[ + { + 'name': 'file-one.csv', + 'type': 'engine type 1', + 'metadata': {'test': 'f1'} + } + ] + } + })) + + r = api_as_admin.post('/engine?level=acquisition&id='+data.acquisition, files=filedata_1) + assert r.ok + + r = api_as_admin.get('/acquisitions/' + data.acquisition) + assert r.ok + a = json.loads(r.content) + + mf = find_file_in_array('file-one.csv', a['files']) + assert mf is not None + + filedata_2 = {} + filedata_2['file1'] = ('file-two.csv', 'some,data,to,send\nanother,row,to,send\n') + filedata_2['metadata'] = ('', json.dumps({ + 'acquisition':{ + 'files':[ + { + 'name': 'file-two.csv', + 'type': 'engine type 1', + 'metadata': {'test': 'f1'} + } + ] + } + })) + + r = api_as_admin.post('/engine?level=acquisition&id='+data.acquisition, files=filedata_2) + assert r.ok + + r = api_as_admin.get('/acquisitions/' + data.acquisition) + assert r.ok + a = json.loads(r.content) + + # Assert both files are still present after upload + mf = find_file_in_array('file-one.csv', a['files']) + assert mf is not None + mf = find_file_in_array('file-two.csv', a['files']) + assert mf is not None + def test_acquisition_metadata_only_engine_upload(with_hierarchy_and_file_data, api_as_admin): data = with_hierarchy_and_file_data From 740b5313f1b2d1c39b7fd5c1b269c6fb43c6c203 Mon Sep 17 00:00:00 2001 From: Megan Henning Date: Wed, 6 Jul 2016 13:50:25 -0500 Subject: [PATCH 11/11] Fix bug with file metadata --- api/placer.py | 20 +++++++---------- api/upload.py | 31 ++++++++++++++++++++------ test/integration_tests/test_uploads.py | 16 ++++++------- 3 files changed, 40 insertions(+), 27 deletions(-) diff --git a/api/placer.py b/api/placer.py index 43b338a04..3aba5648b 100644 --- a/api/placer.py +++ b/api/placer.py @@ -184,8 +184,9 @@ class LabelPlacer(UIDPlacer): class EnginePlacer(Placer): """ - A placer that can accept files sent to it from an engine. - Currently a stub. + A placer that can accept files and/or metadata sent to it from the engine + + It uses update_container_hierarchy to update the container and it's parents' fields from the metadata """ def check(self): @@ -196,28 +197,23 @@ def check(self): def process_file_field(self, field, info): if self.metadata is not None: - # OPPORTUNITY: hard-coded container levels will need to go away soon - # Engine shouldn't know about container names; maybe parent contexts? - # How will this play with upload unification? Unify schemas as well? file_mds = self.metadata.get(self.container_type, {}).get('files', []) for file_md in file_mds: if file_md['name'] == info['name']: + info.update(file_md) break - else: - file_md = {} - - for x in ('type', 'instrument', 'measurements', 'tags', 'metadata'): - info[x] = file_md.get(x) or info[x] self.save_file(field, info) self.saved.append(info) def finalize(self): - # Updating various properties of the hierarchy; currently assumes acquisitions; might need fixing for other levels. - # NOTE: only called in EnginePlacer if self.metadata is not None: bid = bson.ObjectId(self.id) + + # Remove file metadata as it was already updated in process_file_field + for k in self.metadata.keys(): + self.metadata[k].pop('files', {}) self.obj = hierarchy.update_container_hierarchy(self.metadata, bid, self.container_type) return self.saved diff --git a/api/upload.py b/api/upload.py index 89d7bb1af..02604acfe 100644 --- a/api/upload.py +++ b/api/upload.py @@ -206,14 +206,31 @@ def engine(self): """ .. http:post:: /api/engine - Confirm endpoint is ready for requests + Default behavior: + Uploads a list of files sent as file1, file2, etc to a existing + container and updates fields of the files, the container and it's + parents as specified in the metadata fileformfield using the + engine placer class + When ``level`` is ``analysis``: + Uploads a list of files to an existing analysis object, marking + all files as ``output=true`` using the job-based analyses placer + class + + :param level: one of ``project``, ``session``, ``acquisition``, ``analysis`` + :type level: string + + :param id: Container ID + :type id: string + + :param id: Job ID + :type id: string + + :statuscode 200: no error + :statuscode 400: Target container ``level`` is required + :statuscode 400: Level must be ``project``, ``session``, ``acquisition``, ``analysis`` + :statuscode 400: Target container ``id`` is required + :statuscode 402: Uploads must be from an authorized drone - :query level: container_type - :query id: container_id - :query job: job_id - - :statuscode 400: improper or missing params - :statuscode 402: engine uploads must be fron authorized drone """ if not self.superuser_request: diff --git a/test/integration_tests/test_uploads.py b/test/integration_tests/test_uploads.py index 9b3e3251e..957b8f07a 100644 --- a/test/integration_tests/test_uploads.py +++ b/test/integration_tests/test_uploads.py @@ -178,12 +178,12 @@ def test_acquisition_engine_upload(with_hierarchy_and_file_data, api_as_admin): 'metadata': {'test': 'a'}, 'files':[ { - 'name': data.files.keys()[0], + 'name': 'one.csv', 'type': 'engine type 0', 'metadata': {'test': 'f0'} }, { - 'name': data.files.keys()[1], + 'name': 'two.csv', 'type': 'engine type 1', 'metadata': {'test': 'f1'} } @@ -236,14 +236,14 @@ def test_session_engine_upload(with_hierarchy_and_file_data, api_as_admin): 'subject': {'code': 'engine subject'}, 'timestamp': '2016-06-20T21:57:36+00:00', 'metadata': {'test': 's'}, - 'files':[ + 'files': [ { - 'name': data.files.keys()[0], + 'name': 'one.csv', 'type': 'engine type 0', 'metadata': {'test': 'f0'} }, { - 'name': data.files.keys()[1], + 'name': 'two.csv', 'type': 'engine type 1', 'metadata': {'test': 'f1'} } @@ -284,14 +284,14 @@ def test_project_engine_upload(with_hierarchy_and_file_data, api_as_admin): 'project':{ 'label': 'engine project', 'metadata': {'test': 'p'}, - 'files':[ + 'files': [ { - 'name': data.files.keys()[0], + 'name': 'one.csv', 'type': 'engine type 0', 'metadata': {'test': 'f0'} }, { - 'name': data.files.keys()[1], + 'name': 'two.csv', 'type': 'engine type 1', 'metadata': {'test': 'f1'} }