Skip to content

Commit ca084a6

Browse files
committed
Merge pull request #1199 from josenavas/issue-1194
Fixes 1194
2 parents 6819e8a + 03f0731 commit ca084a6

File tree

7 files changed

+360
-232
lines changed

7 files changed

+360
-232
lines changed

qiita_db/metadata_template/constants.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
SAMPLE_TEMPLATE_COLUMNS = {
1616
# The following columns are required by EBI for submission
1717
'EBI': Restriction(columns={'collection_timestamp': 'timestamp',
18-
'physical_specimen_location': 'varchar'},
18+
'physical_specimen_location': 'varchar',
19+
'taxon_id': 'integer',
20+
'scientific_name': 'varchar'},
1921
error_msg="EBI submission disabled"),
2022
# The following columns are required for the official main QIITA site
2123
'qiita_main': Restriction(columns={'sample_type': 'varchar',

qiita_db/metadata_template/test/test_sample_template.py

Lines changed: 216 additions & 177 deletions
Large diffs are not rendered by default.

qiita_db/test/test_analysis.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,9 @@ def test_retrieve_dropped_samples(self):
209209
'Description': 'Test Sample 1',
210210
'str_column': 'Value for sample 1',
211211
'latitude': 42.42,
212-
'longitude': 41.41},
212+
'longitude': 41.41,
213+
'taxon_id': 9606,
214+
'scientific_name': 'homo sapiens'},
213215
'SKD8.640184': {'physical_specimen_location': 'location1',
214216
'physical_specimen_remaining': True,
215217
'dna_extracted': True,
@@ -221,7 +223,9 @@ def test_retrieve_dropped_samples(self):
221223
'Description': 'Test Sample 2',
222224
'str_column': 'Value for sample 2',
223225
'latitude': 4.2,
224-
'longitude': 1.1},
226+
'longitude': 1.1,
227+
'taxon_id': 9606,
228+
'scientific_name': 'homo sapiens'},
225229
'SKB7.640196': {'physical_specimen_location': 'location1',
226230
'physical_specimen_remaining': True,
227231
'dna_extracted': True,
@@ -233,7 +237,9 @@ def test_retrieve_dropped_samples(self):
233237
'Description': 'Test Sample 3',
234238
'str_column': 'Value for sample 3',
235239
'latitude': 4.8,
236-
'longitude': 4.41},
240+
'longitude': 4.41,
241+
'taxon_id': 9606,
242+
'scientific_name': 'homo sapiens'},
237243
}
238244
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
239245

qiita_db/test/test_commands.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -725,19 +725,19 @@ def test_update_preprocessed_data_from_cmd_ppd(self):
725725
"sample_name\trequired_sample_info_status\tcollection_timestamp\t"
726726
"sample_type\tphysical_specimen_remaining\tphysical_specimen_location\t"
727727
"dna_extracted\thost_subject_id\tTreatment\tDOB\tlatitude\tlongitude"
728-
"\tDescription\n"
728+
"\ttaxon_id\tscientific_name\tDescription\n"
729729
"PC.354\treceived\t2014-06-18 16:44\ttype_1\tTrue\tLocation_1\tTrue\t"
730730
"HS_ID_PC.354\tControl\t20061218\t1.88401499993\t56.0003871552\t"
731-
"Control_mouse_I.D._354\n"
731+
"9606\thomo sapiens\tControl_mouse_I.D._354\n"
732732
"PC.593\treceived\t2014-06-18 16:44\ttype_1\tTrue\tLocation_1\tTrue\t"
733733
"HS_ID_PC.593\tControl\t20071210\t35.4079458313\t83.2595338611\t"
734-
"Control_mouse_I.D._593\n"
734+
"9606\thomo sapiens\tControl_mouse_I.D._593\n"
735735
"PC.607\treceived\t2014-06-18 16:44\ttype_1\tTrue\tLocation_1\tTrue\t"
736736
"HS_ID_PC.607\tFast\t20071112\t18.3175615444\t91.3713989729\t"
737-
"Fasting_mouse_I.D._607\n"
737+
"9606\thomo sapiens\tFasting_mouse_I.D._607\n"
738738
"PC.636\treceived\t2014-06-18 16:44\ttype_1\tTrue\tLocation_1\tTrue\t"
739739
"HS_ID_PC.636\tFast\t20080116\t31.0856060708\t4.16781143893\t"
740-
"Fasting_mouse_I.D._636")
740+
"9606\thomo sapiens\tFasting_mouse_I.D._636")
741741

742742
PREP_TEMPLATE = (
743743
'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'

qiita_ware/ebi.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from skbio.util import safe_md5
1515

1616
from qiita_core.qiita_settings import qiita_config
17-
17+
from qiita_ware.exceptions import EBISumbissionError
1818
from qiita_db.logger import LogEntry
1919
from qiita_db.ontology import Ontology
2020
from qiita_db.util import convert_to_id
@@ -280,18 +280,20 @@ def generate_study_xml(self):
280280

281281
return study_set
282282

283-
def add_sample(self, sample_name, taxon_id=None, description=None,
284-
**kwargs):
283+
def add_sample(self, sample_name, taxon_id, scientific_name,
284+
description, **kwargs):
285285
"""Adds sample information to the current submission
286286
287287
Parameters
288288
----------
289289
sample_name : str
290290
Unique identifier for the sample
291-
taxon_id : str, optional
292-
Defaults to ``None``. If not provided, the `empty_value` will be
293-
used for the taxon ID
294-
description : str, optional
291+
taxon_id : str
292+
NCBI's taxon ID for the sample
293+
scientific_name : str
294+
NCBI's scientific name for the `taxon_id`
295+
description : str
296+
295297
Defaults to ``None``. If not provided, the `empty_value` will be
296298
used for the description
297299
@@ -308,15 +310,14 @@ def add_sample(self, sample_name, taxon_id=None, description=None,
308310

309311
self.samples[sample_name] = {}
310312

311-
self.samples[sample_name]['taxon_id'] = self.empty_value if \
312-
taxon_id is None else taxon_id
313-
self.samples[sample_name]['taxon_id'] = \
314-
escape(clean_whitespace(self.samples[sample_name]['taxon_id']))
313+
self.samples[sample_name]['taxon_id'] = escape(
314+
clean_whitespace(taxon_id))
315315

316-
self.samples[sample_name]['description'] = self.empty_value if \
317-
description is None else description
318-
self.samples[sample_name]['description'] = \
319-
escape(clean_whitespace(self.samples[sample_name]['description']))
316+
self.samples[sample_name]['scientific_name'] = escape(
317+
clean_whitespace(scientific_name))
318+
319+
self.samples[sample_name]['description'] = escape(
320+
clean_whitespace(description))
320321

321322
self.samples[sample_name]['attributes'] = self._stringify_kwargs(
322323
kwargs)
@@ -349,6 +350,10 @@ def generate_sample_xml(self):
349350
taxon_id = ET.SubElement(sample_name_element, 'TAXON_ID')
350351
taxon_id.text = escape(clean_whitespace(sample_info['taxon_id']))
351352

353+
taxon_id = ET.SubElement(sample_name_element, 'SCIENTIFIC_NAME')
354+
taxon_id.text = escape(
355+
clean_whitespace(sample_info['scientific_name']))
356+
352357
description = ET.SubElement(sample, 'DESCRIPTION')
353358
description.text = escape(clean_whitespace(
354359
sample_info['description']))
@@ -791,21 +796,36 @@ def add_samples_from_templates(self, sample_template, prep_template,
791796
Path to the directory containing per-sample FASTQ files where
792797
the sequence labels should be:
793798
``SampleID_SequenceNumber And Additional Notes if Applicable``
799+
800+
Raises
801+
------
802+
EBISumbissionError
803+
If a sample doesn't have the required EBI submission information
794804
"""
795805
if not exists(per_sample_fastq_dir):
796806
raise IOError('The directory with the FASTQ file does not exist.')
797807

798808
for sample in iter_file_via_list_of_dicts(sample_template):
799809
sample_name = sample.pop('sample_name')
800810
taxon_id = sample.pop('taxon_id', None)
811+
scientific_name = sample.pop('scientific_name', None)
801812
description = sample.pop('description', None)
802813

803-
self.add_sample(sample_name, taxon_id=taxon_id,
804-
description=description,
805-
**sample)
814+
if taxon_id is None or scientific_name is None or \
815+
description is None:
816+
raise EBISumbissionError(
817+
"Sample '%s' is missing required EBI submission "
818+
"information. taxon_id: %s; scientific_name: %s; "
819+
"description: %s" % (sample_name, taxon_id,
820+
scientific_name, description))
806821

822+
self.add_sample(sample_name, taxon_id, scientific_name,
823+
description, **sample)
824+
825+
prep_template_samples = []
807826
for prep in iter_file_via_list_of_dicts(prep_template):
808827
sample_name = prep.pop('sample_name')
828+
prep_template_samples.append(sample_name)
809829
platform = prep.pop('platform')
810830
experiment_design_description = prep.pop(
811831
'experiment_design_description')
@@ -818,6 +838,10 @@ def add_samples_from_templates(self, sample_template, prep_template,
818838
library_construction_protocol,
819839
**prep)
820840

841+
to_remove = set(self.samples).difference(prep_template_samples)
842+
for sample in to_remove:
843+
del self.samples[sample]
844+
821845
@classmethod
822846
def from_templates_and_per_sample_fastqs(cls, preprocessed_data_id,
823847
study_title,

qiita_ware/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,8 @@ class StudyDoesNotExistsError(QiitaWareError):
4040
class ComputeError(QiitaWareError):
4141
"""A compute error happened"""
4242
pass
43+
44+
45+
class EBISumbissionError(QiitaWareError):
46+
"""Error used when EBI cannot be submitted"""
47+
pass

0 commit comments

Comments
 (0)