From 26a9f75159a9c80ffa8109df1f9a10bd76663c97 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Wed, 5 Nov 2025 14:33:21 -0700 Subject: [PATCH 01/11] TST: study associations endpoint test --- .../test/rest/test_study_associations.py | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 qiita_pet/test/rest/test_study_associations.py diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py new file mode 100644 index 000000000..ea2053f16 --- /dev/null +++ b/qiita_pet/test/rest/test_study_associations.py @@ -0,0 +1,151 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from unittest import main + +from tornado.escape import json_decode + +from qiita_db.study import Study +from qiita_pet.test.rest.test_base import RESTHandlerTestCase + + +class StudyAssociationTests(RESTHandlerTestCase): + def test_get_valid(self): + IGNORE = IGNORE + exp = {'study': 1, + 'prep_templates': [{'prep_id': 1, + 'prep_filepath': IGNORE, + 'prep_datatype': '18S', + 'prep_human_filtering': 'The greatest human filtering method', + 'prep_artifacts': [{'artifact_id': 1, + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [{'artifact_filepath_id': 1, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_forward_seqs'}, + {'artifact_filepath_id': 2, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_barcodes'}]}, + {'artifact_id': 2, + 'artifact_parent_ids': None, + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': {'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'False', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': [{'artifact_filepath_id': 3, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'preprocessed_fasta'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 4, + 'artifact_filepath_type': 'preprocessed_fastq'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 5, + 'artifact_filepath_type': 'preprocessed_demux'}]}, + {'artifact_id': 3, + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': {'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'False', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': []}, + {'artifact_id': 4, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 5, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 6, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '2', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 12, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}, + {'prep_id': 2, + 'prep_filepath': IGNORE, + 'prep_datatype': '18S', + 'prep_human_filtering': None, + 'prep_artifacts': [{'artifact_id': 7, + 'artifact_parent_ids': [], + 'artifact_basal_id': 7, + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [{'artifact_filepath_id': 22, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}]} + + response = self.get('/api/v1/study-association/1', headers=self.headers) + self.assertEqual(response.code, 200) + obs = json_decode(response.body) + self.assertEqual(obs, exp) + + def test_get_invalid(self): + response = self.get('/api/v1/study-association/0', headers=self.headers) + self.assertEqual(response.code, 404) + self.assertEqual(json_decode(response.body), + {'message': 'Study not found'}) + + +if __name__ == '__main__': + main() From 4c1e91f65b40747e26e1a4baaf66c467dbdc1632 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Wed, 5 Nov 2025 14:33:45 -0700 Subject: [PATCH 02/11] MAINT: be defensive on artifact prep assocation expectations --- qiita_db/artifact.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index bf81ddf41..ea40e3bef 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1463,8 +1463,22 @@ def prep_templates(self): FROM qiita.preparation_artifact WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return [qdb.metadata_template.prep_template.PrepTemplate(pt_id) - for pt_id in qdb.sql_connection.TRN.execute_fetchflatten()] + templates = [qdb.metadata_template.prep_template.PrepTemplate(pt_id) + for pt_id in qdb.sql_connection.TRN.execute_fetchflatten()] + + if len(templates) > 1: + # We never expect an artifact to be associated with multiple + # preparations + ids = [p.id for p in templates] + msg = f"Artifact({self.id}) associated with preps: {sorted(ids)}" + raise ValueError(msg) + + if len(templates) == 0: + # An artifact must be associated with a template + msg = f"Artifact({self.id}) is not associated with a template" + raise ValueError(msg) + + return templates @property def study(self): From 6bb87290c948b2cb86f3eae4b1ef0ef88814fb80 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 6 Nov 2025 16:20:28 -0700 Subject: [PATCH 03/11] API: add /api/v1/study//associations to retrieve comprehensive id, path, processing information for a study --- qiita_pet/handlers/rest/__init__.py | 2 + qiita_pet/handlers/rest/study_association.py | 195 ++++++++++++++++++ .../test/rest/test_study_associations.py | 61 ++++-- 3 files changed, 243 insertions(+), 15 deletions(-) create mode 100644 qiita_pet/handlers/rest/study_association.py diff --git a/qiita_pet/handlers/rest/__init__.py b/qiita_pet/handlers/rest/__init__.py index 73ad9382a..913758457 100644 --- a/qiita_pet/handlers/rest/__init__.py +++ b/qiita_pet/handlers/rest/__init__.py @@ -7,6 +7,7 @@ # ----------------------------------------------------------------------------- from .study import StudyHandler, StudyCreatorHandler, StudyStatusHandler +from .study_association import StudyAssociationHandler from .study_samples import (StudySamplesHandler, StudySamplesInfoHandler, StudySamplesCategoriesHandler, StudySamplesDetailHandler, @@ -25,6 +26,7 @@ ENDPOINTS = ( (r"/api/v1/study$", StudyCreatorHandler), (r"/api/v1/study/([0-9]+)$", StudyHandler), + (r"/api/v1/study/([0-9]+)/associations$", StudyAssociationHandler), (r"/api/v1/study/([0-9]+)/samples/categories=([a-zA-Z\-0-9\.:,_]*)", StudySamplesCategoriesHandler), (r"/api/v1/study/([0-9]+)/samples", StudySamplesHandler), diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py new file mode 100644 index 000000000..525a42a14 --- /dev/null +++ b/qiita_pet/handlers/rest/study_association.py @@ -0,0 +1,195 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- +import warnings + +from tornado.escape import json_decode + +from qiita_db.handlers.oauth2 import authenticate_oauth +from qiita_db.study import StudyPerson, Study +from qiita_db.user import User +from .rest_handler import RESTHandler +from qiita_db.metadata_template.constants import SAMPLE_TEMPLATE_COLUMNS + + +# terms used more than once +_STUDY = 'study' +_PREP = 'prep' +_FILEPATH = 'filepath' +_STATUS = 'status' +_ARTIFACT = 'artifact' +_SAMPLE = 'sample' +_METADATA = 'metadata' +_TEMPLATE = 'template' +_ID = 'id' +_PROCESSING = 'processing' +_TYPE = 'type' + +# payload keys +STUDY_ID = f'{_STUDY}_{_ID}' +STUDY_SAMPLE_METADATA_FILEPATH = f'{_STUDY}_{_SAMPLE}_{_METADATA}_{_FILEPATH}' +PREP_TEMPLATES = f'{_PREP}_{_TEMPLATE}s' +PREP_ID = f'{_PREP}_{_ID}' +PREP_STATUS = f'{_PREP}_{_STATUS}' +PREP_SAMPLE_METADATA_FILEPATH = f'{_PREP}_{_SAMPLE}_{_METADATA}_{_FILEPATH}' +PREP_DATA_TYPE = f'{_PREP}_data_{_TYPE}' +PREP_HUMAN_FILTERING = f'{_PREP}_human_filtering' +PREP_ARTIFACTS = f'{_PREP}_{_ARTIFACT}s' +ARTIFACT_ID = f'{_ARTIFACT}_{_ID}' +ARTIFACT_STATUS = f'{_ARTIFACT}_{_STATUS}' +ARTIFACT_PARENT_IDS = f'{_ARTIFACT}_parent_{_ID}s' +ARTIFACT_BASAL_ID = f'{_ARTIFACT}_basal_{_ID}' +ARTIFACT_PROCESSING_ID = f'{_ARTIFACT}_{_PROCESSING}_{_ID}' +ARTIFACT_PROCESSING_NAME = f'{_ARTIFACT}_{_PROCESSING}_name' +ARTIFACT_PROCESSING_ARGUMENTS = f'{_ARTIFACT}_{_PROCESSING}_arguments' +ARTIFACT_FILEPATHS = f'{_ARTIFACT}_{_FILEPATH}s' +ARTIFACT_FILEPATH = f'{_ARTIFACT}_{_FILEPATH}' +ARTIFACT_FILEPATH_TYPE = f'{_ARTIFACT}_{_FILEPATH}_{_TYPE}' +ARTIFACT_FILEPATH_ID = f'{_ARTIFACT}_{_FILEPATH}_{_ID}' + + +def _most_recent_template_path(template): + filepaths = template.get_filepaths() + + # the test dataset shows that a prep can exist without a prep template + if len(filepaths) == 0: + return None + + metadata_paths = sorted(filepaths, reverse=True) + + # [0] -> the highest file by ID + # [1] -> the filepath + return metadata_paths[0][1] + + +def _set_study(payload, study): + filepath = _most_recent_template_path(study.sample_template) + + payload[STUDY_ID] = study.id + payload[STUDY_SAMPLE_METADATA_FILEPATH] = filepath + + +def _set_prep_templates(payload, study): + template_data = [] + for pt in study.prep_templates(): + _set_prep_template(template_data, pt) + payload[PREP_TEMPLATES] = template_data + + +def _get_human_filtering(prep_template): + # .current_human_filtering does not describe what the human filter is + if prep_template.artifact is not None: + return prep_template.artifact.human_reads_filter_method + + +def _set_prep_template(template_payload, prep_template): + filepath = _most_recent_template_path(prep_template) + + current_template = {} + current_template[PREP_ID] = prep_template.id + current_template[PREP_STATUS] = prep_template.status + current_template[PREP_SAMPLE_METADATA_FILEPATH] = filepath + current_template[PREP_DATA_TYPE] = prep_template.data_type() + current_template[PREP_HUMAN_FILTERING] = _get_human_filtering(prep_template) + + _set_artifacts(current_template, prep_template) + + template_payload.append(current_template) + + +def _get_artifacts(prep_template): + pending_artifact_objects = [prep_template.artifact, ] + all_artifact_objects = set(pending_artifact_objects[:]) + + while pending_artifact_objects: + artifact = pending_artifact_objects.pop() + pending_artifact_objects.extend(artifact.children) + all_artifact_objects.update(set(artifact.children)) + + return sorted(all_artifact_objects, key=lambda artifact: artifact.id) + + +def _set_artifacts(template_payload, prep_template): + prep_artifacts = [] + + if prep_template.artifact is None: + basal_id = None + else: + basal_id = prep_template.artifact.id + + for artifact in _get_artifacts(prep_template): + _set_artifact(prep_artifacts, artifact, basal_id) + template_payload[PREP_ARTIFACTS] = prep_artifacts + + +def _set_artifact(prep_artifacts, artifact, basal_id): + artifact_payload = {} + artifact_payload[ARTIFACT_ID] = artifact.id + + # Prep uses .status, artifact uses .visibility + # favoring .status as visibility implies a UI + artifact_payload[ARTIFACT_STATUS] = artifact.visibility + + parents = [parent.id for parent in artifact.parents] + artifact_payload[ARTIFACT_PARENT_IDS] = parents if parents else None + artifact_payload[ARTIFACT_BASAL_ID] = basal_id + + _set_artifact_processing(artifact_payload, artifact) + _set_artifact_filepaths(artifact_payload, artifact) + + prep_artifacts.append(artifact_payload) + + +def _set_artifact_processing(artifact_payload, artifact): + processing_parameters = artifact.processing_parameters + if processing_parameters is None: + artifact_processing_id = None + artifact_processing_name = None + artifact_processing_arguments = None + else: + command = processing_parameters.command + artifact_processing_id = command.id + artifact_processing_name = command.name + artifact_processing_arguments = processing_parameters.values + + artifact_payload[ARTIFACT_PROCESSING_ID] = artifact_processing_id + artifact_payload[ARTIFACT_PROCESSING_NAME] = artifact_processing_name + artifact_payload[ARTIFACT_PROCESSING_ARGUMENTS] = artifact_processing_arguments + + +def _set_artifact_filepaths(artifact_payload, artifact): + artifact_filepaths = [] + for filepath_data in artifact.filepaths: + local_payload = {} + local_payload[ARTIFACT_FILEPATH] = filepath_data['fp'] + local_payload[ARTIFACT_FILEPATH_ID] = filepath_data['fp_id'] + local_payload[ARTIFACT_FILEPATH_TYPE] = filepath_data['fp_type'] + artifact_filepaths.append(local_payload) + + # the test study includes an artifact which does not have filepaths + if len(artifact_filepaths) == 0: + artifact_filepaths = None + + artifact_payload[ARTIFACT_FILEPATHS] = artifact_filepaths + + +class StudyAssociationHandler(RESTHandler): + @authenticate_oauth + def get(self, study_id): + study = self.safe_get_study(study_id) + if study is None: + return + + payload = {} + _set_study(payload, study) + _set_prep_templates(payload, study) + self.write(payload) + self.finish() + + + # get all the things + diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index ea2053f16..4d2adbc99 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -16,14 +16,17 @@ class StudyAssociationTests(RESTHandlerTestCase): def test_get_valid(self): - IGNORE = IGNORE - exp = {'study': 1, + IGNORE = 'IGNORE' + exp = {'study_id': 1, + 'study_sample_metadata_filepath': IGNORE, 'prep_templates': [{'prep_id': 1, - 'prep_filepath': IGNORE, - 'prep_datatype': '18S', + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', 'prep_human_filtering': 'The greatest human filtering method', 'prep_artifacts': [{'artifact_id': 1, - 'artifact_parent_ids': [1], + 'artifact_status': 'private', + 'artifact_parent_ids': None, 'artifact_basal_id': 1, 'artifact_processing_id': None, 'artifact_processing_name': None, @@ -35,7 +38,8 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'raw_barcodes'}]}, {'artifact_id': 2, - 'artifact_parent_ids': None, + 'artifact_status': 'private', + 'artifact_parent_ids': [1], 'artifact_basal_id': 1, 'artifact_processing_id': 1, 'artifact_processing_name': 'Split libraries FASTQ', @@ -60,6 +64,7 @@ def test_get_valid(self): 'artifact_filepath_id': 5, 'artifact_filepath_type': 'preprocessed_demux'}]}, {'artifact_id': 3, + 'artifact_status': 'private', 'artifact_parent_ids': [1], 'artifact_basal_id': 1, 'artifact_processing_id': 1, @@ -69,14 +74,15 @@ def test_get_valid(self): 'min_per_read_length_fraction': '0.75', 'sequence_max_n': '0', 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', + 'rev_comp_mapping_barcodes': 'True', 'rev_comp': 'False', 'phred_quality_threshold': '3', 'barcode_type': 'golay_12', 'max_barcode_errors': '1.5', 'phred_offset': 'auto'}, - 'artifact_filepaths': []}, + 'artifact_filepaths': None}, {'artifact_id': 4, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -92,6 +98,7 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}, {'artifact_id': 5, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -107,6 +114,7 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}, {'artifact_id': 6, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -122,12 +130,14 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}]}, {'prep_id': 2, - 'prep_filepath': IGNORE, - 'prep_datatype': '18S', + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', 'prep_human_filtering': None, 'prep_artifacts': [{'artifact_id': 7, - 'artifact_parent_ids': [], + 'artifact_parent_ids': None, 'artifact_basal_id': 7, + 'artifact_status': 'private', 'artifact_processing_id': None, 'artifact_processing_name': None, 'artifact_processing_arguments': None, @@ -135,16 +145,37 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}]}]} - response = self.get('/api/v1/study-association/1', headers=self.headers) + response = self.get('/api/v1/study/1/associations', headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) + + def _process_dict(d): + return [(d, k) for k in d] + + def _process_list(l): + if l is None: + return [] + + return [dk for d in l + for dk in _process_dict(d)] + + stack = _process_dict(obs) + while stack: + (d, k) = stack.pop() + if k.endswith('filepath'): + d[k] = IGNORE + elif k.endswith('filepaths'): + stack.extend(_process_list(d[k])) + elif k.endswith('templates'): + stack.extend(_process_list(d[k])) + elif k.endswith('artifacts'): + stack.extend(_process_list(d[k])) + self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get('/api/v1/study-association/0', headers=self.headers) + response = self.get('/api/v1/study/0/associations', headers=self.headers) self.assertEqual(response.code, 404) - self.assertEqual(json_decode(response.body), - {'message': 'Study not found'}) if __name__ == '__main__': From 058fb248947c337dc8544846503496c97b69b3fa Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 6 Nov 2025 16:40:13 -0700 Subject: [PATCH 04/11] DOC: note schema, add some additiona doc strings and comments --- qiita_pet/handlers/rest/study_association.py | 37 +++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py index 525a42a14..1b67d3a54 100644 --- a/qiita_pet/handlers/rest/study_association.py +++ b/qiita_pet/handlers/rest/study_association.py @@ -53,6 +53,7 @@ def _most_recent_template_path(template): + """Obtain the most recent available template filepath""" filepaths = template.get_filepaths() # the test dataset shows that a prep can exist without a prep template @@ -67,6 +68,7 @@ def _most_recent_template_path(template): def _set_study(payload, study): + """Set study level information""" filepath = _most_recent_template_path(study.sample_template) payload[STUDY_ID] = study.id @@ -74,6 +76,7 @@ def _set_study(payload, study): def _set_prep_templates(payload, study): + """Set prep template level information""" template_data = [] for pt in study.prep_templates(): _set_prep_template(template_data, pt) @@ -81,12 +84,15 @@ def _set_prep_templates(payload, study): def _get_human_filtering(prep_template): + """Obtain the human filtering if applied""" # .current_human_filtering does not describe what the human filter is + # so we will examine the first artifact off the prep if prep_template.artifact is not None: return prep_template.artifact.human_reads_filter_method def _set_prep_template(template_payload, prep_template): + """Set an individual prep template information""" filepath = _most_recent_template_path(prep_template) current_template = {} @@ -102,6 +108,7 @@ def _set_prep_template(template_payload, prep_template): def _get_artifacts(prep_template): + """Get artifact information associated with a prep""" pending_artifact_objects = [prep_template.artifact, ] all_artifact_objects = set(pending_artifact_objects[:]) @@ -114,6 +121,7 @@ def _get_artifacts(prep_template): def _set_artifacts(template_payload, prep_template): + """Set artifact information specific to a prep""" prep_artifacts = [] if prep_template.artifact is None: @@ -127,6 +135,7 @@ def _set_artifacts(template_payload, prep_template): def _set_artifact(prep_artifacts, artifact, basal_id): + """Set artifact specific information""" artifact_payload = {} artifact_payload[ARTIFACT_ID] = artifact.id @@ -145,6 +154,7 @@ def _set_artifact(prep_artifacts, artifact, basal_id): def _set_artifact_processing(artifact_payload, artifact): + """Set processing parameter information associated with an artifact""" processing_parameters = artifact.processing_parameters if processing_parameters is None: artifact_processing_id = None @@ -162,6 +172,7 @@ def _set_artifact_processing(artifact_payload, artifact): def _set_artifact_filepaths(artifact_payload, artifact): + """Set filepath information associated with an artifact""" artifact_filepaths = [] for filepath_data in artifact.filepaths: local_payload = {} @@ -184,12 +195,30 @@ def get(self, study_id): if study is None: return + # schema: + # STUDY_ID: , + # STUDY_SAMPLE_METADATA_FILEPATH: , + # PREP_TEMPLATES: None | list[dict] + # PREP_ID: , + # PREP_STATUS: , + # PREP_SAMPLE_METADATA_FILEPATH: , + # PREP_DATA_TYPE: , + # PREP_HUMAN_FILTERING: None | , + # PREP_ARTIFACTS: None | list[dict] + # ARTIFACT_ID: , + # ARTIFACT_STATUS: , + # ARTIFACT_PARENT_IDS: None | list[int], + # ARTIFACT_BASAL_ID: None | , + # ARTIFACT_PROCESSING_ID: None | , + # ARTIFACT_PROCESSING_NAME: None | , + # ARTIFACT_FILEPATH: , + # ARTIFACT_FILEPATH_TYPE': + # payload = {} _set_study(payload, study) _set_prep_templates(payload, study) self.write(payload) self.finish() - - - # get all the things - From fffb9ec3985c9a310c5d6026910b8119d5f4fbb5 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 7 Nov 2025 08:47:39 -0700 Subject: [PATCH 05/11] LINT: pass w/ ruff check --- qiita_pet/handlers/rest/study_association.py | 5 ----- qiita_pet/test/rest/test_study_associations.py | 7 +++---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py index 1b67d3a54..30e82dd02 100644 --- a/qiita_pet/handlers/rest/study_association.py +++ b/qiita_pet/handlers/rest/study_association.py @@ -5,15 +5,10 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -import warnings -from tornado.escape import json_decode from qiita_db.handlers.oauth2 import authenticate_oauth -from qiita_db.study import StudyPerson, Study -from qiita_db.user import User from .rest_handler import RESTHandler -from qiita_db.metadata_template.constants import SAMPLE_TEMPLATE_COLUMNS # terms used more than once diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index 4d2adbc99..6ce06b8ec 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -10,7 +10,6 @@ from tornado.escape import json_decode -from qiita_db.study import Study from qiita_pet.test.rest.test_base import RESTHandlerTestCase @@ -152,11 +151,11 @@ def test_get_valid(self): def _process_dict(d): return [(d, k) for k in d] - def _process_list(l): - if l is None: + def _process_list(list_): + if list_ is None: return [] - return [dk for d in l + return [dk for d in list_ for dk in _process_dict(d)] stack = _process_dict(obs) From a26d65cd7d448c866383606f5e5d66ad35eb1a58 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 7 Nov 2025 08:52:27 -0700 Subject: [PATCH 06/11] LINT: now with flake8 --- qiita_pet/handlers/rest/study_association.py | 4 +- .../test/rest/test_study_associations.py | 270 ++++++++++-------- 2 files changed, 145 insertions(+), 129 deletions(-) diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py index 30e82dd02..f0627054e 100644 --- a/qiita_pet/handlers/rest/study_association.py +++ b/qiita_pet/handlers/rest/study_association.py @@ -95,7 +95,7 @@ def _set_prep_template(template_payload, prep_template): current_template[PREP_STATUS] = prep_template.status current_template[PREP_SAMPLE_METADATA_FILEPATH] = filepath current_template[PREP_DATA_TYPE] = prep_template.data_type() - current_template[PREP_HUMAN_FILTERING] = _get_human_filtering(prep_template) + current_template[PREP_HUMAN_FILTERING] = _get_human_filtering(prep_template) # noqa _set_artifacts(current_template, prep_template) @@ -163,7 +163,7 @@ def _set_artifact_processing(artifact_payload, artifact): artifact_payload[ARTIFACT_PROCESSING_ID] = artifact_processing_id artifact_payload[ARTIFACT_PROCESSING_NAME] = artifact_processing_name - artifact_payload[ARTIFACT_PROCESSING_ARGUMENTS] = artifact_processing_arguments + artifact_payload[ARTIFACT_PROCESSING_ARGUMENTS] = artifact_processing_arguments # noqa def _set_artifact_filepaths(artifact_payload, artifact): diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index 6ce06b8ec..43df423b3 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -18,133 +18,148 @@ def test_get_valid(self): IGNORE = 'IGNORE' exp = {'study_id': 1, 'study_sample_metadata_filepath': IGNORE, - 'prep_templates': [{'prep_id': 1, - 'prep_status': 'private', - 'prep_sample_metadata_filepath': IGNORE, - 'prep_data_type': '18S', - 'prep_human_filtering': 'The greatest human filtering method', - 'prep_artifacts': [{'artifact_id': 1, - 'artifact_status': 'private', - 'artifact_parent_ids': None, - 'artifact_basal_id': 1, - 'artifact_processing_id': None, - 'artifact_processing_name': None, - 'artifact_processing_arguments': None, - 'artifact_filepaths': [{'artifact_filepath_id': 1, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'raw_forward_seqs'}, - {'artifact_filepath_id': 2, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'raw_barcodes'}]}, - {'artifact_id': 2, - 'artifact_status': 'private', - 'artifact_parent_ids': [1], - 'artifact_basal_id': 1, - 'artifact_processing_id': 1, - 'artifact_processing_name': 'Split libraries FASTQ', - 'artifact_processing_arguments': {'input_data': '1', - 'max_bad_run_length': '3', - 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', - 'rev_comp': 'False', - 'phred_quality_threshold': '3', - 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', - 'phred_offset': 'auto'}, - 'artifact_filepaths': [{'artifact_filepath_id': 3, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'preprocessed_fasta'}, - {'artifact_filepath': IGNORE, - 'artifact_filepath_id': 4, - 'artifact_filepath_type': 'preprocessed_fastq'}, - {'artifact_filepath': IGNORE, - 'artifact_filepath_id': 5, - 'artifact_filepath_type': 'preprocessed_demux'}]}, - {'artifact_id': 3, - 'artifact_status': 'private', - 'artifact_parent_ids': [1], - 'artifact_basal_id': 1, - 'artifact_processing_id': 1, - 'artifact_processing_name': 'Split libraries FASTQ', - 'artifact_processing_arguments': {'input_data': '1', - 'max_bad_run_length': '3', - 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'True', - 'rev_comp': 'False', - 'phred_quality_threshold': '3', - 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', - 'phred_offset': 'auto'}, - 'artifact_filepaths': None}, - {'artifact_id': 4, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': {'input_data': '2', - 'reference': '1', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{'artifact_filepath_id': 9, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}, - {'artifact_id': 5, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': {'input_data': '2', - 'reference': '1', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{'artifact_filepath_id': 9, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}, - {'artifact_id': 6, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': {'input_data': '2', - 'reference': '2', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{'artifact_filepath_id': 12, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}]}, - {'prep_id': 2, - 'prep_status': 'private', - 'prep_sample_metadata_filepath': IGNORE, - 'prep_data_type': '18S', - 'prep_human_filtering': None, - 'prep_artifacts': [{'artifact_id': 7, - 'artifact_parent_ids': None, - 'artifact_basal_id': 7, - 'artifact_status': 'private', - 'artifact_processing_id': None, - 'artifact_processing_name': None, - 'artifact_processing_arguments': None, - 'artifact_filepaths': [{'artifact_filepath_id': 22, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}]}]} + 'prep_templates': [ + {'prep_id': 1, + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', + 'prep_human_filtering': 'The greatest human filtering method', # noqa + 'prep_artifacts': [ + {'artifact_id': 1, + 'artifact_status': 'private', + 'artifact_parent_ids': None, + 'artifact_basal_id': 1, + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [ + {'artifact_filepath_id': 1, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_forward_seqs'}, + {'artifact_filepath_id': 2, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_barcodes'}]}, + {'artifact_id': 2, + 'artifact_status': 'private', + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': { + 'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'False', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': [ + {'artifact_filepath_id': 3, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'preprocessed_fasta'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 4, + 'artifact_filepath_type': 'preprocessed_fastq'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 5, + 'artifact_filepath_type': 'preprocessed_demux'}]}, + {'artifact_id': 3, + 'artifact_status': 'private', + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': { + 'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'True', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': None}, + {'artifact_id': 4, + 'artifact_status': 'private', + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': { + 'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{ + 'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 5, + 'artifact_status': 'private', + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': { + 'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{ + 'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 6, + 'artifact_status': 'private', + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': { + 'input_data': '2', + 'reference': '2', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{ + 'artifact_filepath_id': 12, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}, + {'prep_id': 2, + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', + 'prep_human_filtering': None, + 'prep_artifacts': [{ + 'artifact_id': 7, + 'artifact_parent_ids': None, + 'artifact_basal_id': 7, + 'artifact_status': 'private', + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [{ + 'artifact_filepath_id': 22, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}]} - response = self.get('/api/v1/study/1/associations', headers=self.headers) + response = self.get('/api/v1/study/1/associations', + headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) @@ -173,7 +188,8 @@ def _process_list(list_): self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get('/api/v1/study/0/associations', headers=self.headers) + response = self.get('/api/v1/study/0/associations', + headers=self.headers) self.assertEqual(response.code, 404) From 9c383b103883c17d3a515b8320d922b86c76c68e Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 7 Nov 2025 09:35:18 -0700 Subject: [PATCH 07/11] Rollback: allow an artifact to be unlinked --- qiita_db/artifact.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index ea40e3bef..3ec30c7b3 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1473,11 +1473,6 @@ def prep_templates(self): msg = f"Artifact({self.id}) associated with preps: {sorted(ids)}" raise ValueError(msg) - if len(templates) == 0: - # An artifact must be associated with a template - msg = f"Artifact({self.id}) is not associated with a template" - raise ValueError(msg) - return templates @property From ffed58402b92e5bcdcd4748333a8cbce3394395a Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 7 Nov 2025 09:47:41 -0700 Subject: [PATCH 08/11] DOC: note internal rest endpoint --- qiita_pet/support_files/doc/source/dev/rest.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qiita_pet/support_files/doc/source/dev/rest.rst b/qiita_pet/support_files/doc/source/dev/rest.rst index 8707831d7..fee62f6da 100755 --- a/qiita_pet/support_files/doc/source/dev/rest.rst +++ b/qiita_pet/support_files/doc/source/dev/rest.rst @@ -106,6 +106,8 @@ This is the currently internal but planned to be external (general users) API. +--------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ |GET | ``/api/v1/study//status`` | The status of a study (whether or not the study: is public, has sample information, sample information has warnings and a list of existing preparations. | +--------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +|GET | ``/api/v1/study//associations`` | Comprehensive information about a study, associated prep and artifact information, and file locations | ++--------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ |GET | ``/api/v1/person`` | Get list of persons. | +--------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ |GET | ``/api/v1/person?name=foo&affiliation=bar`` | See if a person exists. | From 24449b469d4b40a90efbaefc102aeb4a4d81c799 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 7 Nov 2025 09:56:09 -0700 Subject: [PATCH 09/11] LINT: flake8 --- qiita_db/artifact.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 3ec30c7b3..e5eb581eb 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1463,8 +1463,8 @@ def prep_templates(self): FROM qiita.preparation_artifact WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - templates = [qdb.metadata_template.prep_template.PrepTemplate(pt_id) - for pt_id in qdb.sql_connection.TRN.execute_fetchflatten()] + templates = [qdb.metadata_template.prep_template.PrepTemplate(pt_id) # noqa + for pt_id in qdb.sql_connection.TRN.execute_fetchflatten()] # noqa if len(templates) > 1: # We never expect an artifact to be associated with multiple From c6f1213d0002b0540c3dac85d64bfd4b57ebdd3f Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 7 Nov 2025 13:54:03 -0700 Subject: [PATCH 10/11] Remove extraneous sort --- qiita_pet/handlers/rest/study_association.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py index f0627054e..4ef9822be 100644 --- a/qiita_pet/handlers/rest/study_association.py +++ b/qiita_pet/handlers/rest/study_association.py @@ -55,11 +55,9 @@ def _most_recent_template_path(template): if len(filepaths) == 0: return None - metadata_paths = sorted(filepaths, reverse=True) - # [0] -> the highest file by ID # [1] -> the filepath - return metadata_paths[0][1] + return filepaths[0][1] def _set_study(payload, study): From ad17f301a77dc042a5da85f3104902a2b6b9626c Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 7 Nov 2025 15:44:18 -0700 Subject: [PATCH 11/11] An incomplete prep will not have an artifact --- qiita_pet/handlers/rest/study_association.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py index 4ef9822be..855d33d42 100644 --- a/qiita_pet/handlers/rest/study_association.py +++ b/qiita_pet/handlers/rest/study_association.py @@ -102,6 +102,9 @@ def _set_prep_template(template_payload, prep_template): def _get_artifacts(prep_template): """Get artifact information associated with a prep""" + if prep_template.artifact is None: + return [] + pending_artifact_objects = [prep_template.artifact, ] all_artifact_objects = set(pending_artifact_objects[:])