1414from skbio .util import safe_md5
1515
1616from qiita_core .qiita_settings import qiita_config
17-
17+ from qiita_ware . exceptions import EBISumbissionError
1818from qiita_db .logger import LogEntry
1919from qiita_db .ontology import Ontology
2020from qiita_db .util import convert_to_id
@@ -280,18 +280,20 @@ def generate_study_xml(self):
280280
281281 return study_set
282282
283- def add_sample (self , sample_name , taxon_id = None , description = None ,
284- ** kwargs ):
283+ def add_sample (self , sample_name , taxon_id , scientific_name ,
284+ description , ** kwargs ):
285285 """Adds sample information to the current submission
286286
287287 Parameters
288288 ----------
289289 sample_name : str
290290 Unique identifier for the sample
291- taxon_id : str, optional
292- Defaults to ``None``. If not provided, the `empty_value` will be
293- used for the taxon ID
294- description : str, optional
291+ taxon_id : str
292+ NCBI's taxon ID for the sample
293+ scientific_name : str
294+ NCBI's scientific name for the `taxon_id`
295+ description : str
296+
295297 Defaults to ``None``. If not provided, the `empty_value` will be
296298 used for the description
297299
@@ -308,15 +310,14 @@ def add_sample(self, sample_name, taxon_id=None, description=None,
308310
309311 self .samples [sample_name ] = {}
310312
311- self .samples [sample_name ]['taxon_id' ] = self .empty_value if \
312- taxon_id is None else taxon_id
313- self .samples [sample_name ]['taxon_id' ] = \
314- escape (clean_whitespace (self .samples [sample_name ]['taxon_id' ]))
313+ self .samples [sample_name ]['taxon_id' ] = escape (
314+ clean_whitespace (taxon_id ))
315315
316- self .samples [sample_name ]['description' ] = self .empty_value if \
317- description is None else description
318- self .samples [sample_name ]['description' ] = \
319- escape (clean_whitespace (self .samples [sample_name ]['description' ]))
316+ self .samples [sample_name ]['scientific_name' ] = escape (
317+ clean_whitespace (scientific_name ))
318+
319+ self .samples [sample_name ]['description' ] = escape (
320+ clean_whitespace (description ))
320321
321322 self .samples [sample_name ]['attributes' ] = self ._stringify_kwargs (
322323 kwargs )
@@ -349,6 +350,10 @@ def generate_sample_xml(self):
349350 taxon_id = ET .SubElement (sample_name_element , 'TAXON_ID' )
350351 taxon_id .text = escape (clean_whitespace (sample_info ['taxon_id' ]))
351352
353+ taxon_id = ET .SubElement (sample_name_element , 'SCIENTIFIC_NAME' )
354+ taxon_id .text = escape (
355+ clean_whitespace (sample_info ['scientific_name' ]))
356+
352357 description = ET .SubElement (sample , 'DESCRIPTION' )
353358 description .text = escape (clean_whitespace (
354359 sample_info ['description' ]))
@@ -791,21 +796,36 @@ def add_samples_from_templates(self, sample_template, prep_template,
791796 Path to the directory containing per-sample FASTQ files where
792797 the sequence labels should be:
793798 ``SampleID_SequenceNumber And Additional Notes if Applicable``
799+
800+ Raises
801+ ------
802+ EBISumbissionError
803+ If a sample doesn't have the required EBI submission information
794804 """
795805 if not exists (per_sample_fastq_dir ):
796806 raise IOError ('The directory with the FASTQ file does not exist.' )
797807
798808 for sample in iter_file_via_list_of_dicts (sample_template ):
799809 sample_name = sample .pop ('sample_name' )
800810 taxon_id = sample .pop ('taxon_id' , None )
811+ scientific_name = sample .pop ('scientific_name' , None )
801812 description = sample .pop ('description' , None )
802813
803- self .add_sample (sample_name , taxon_id = taxon_id ,
804- description = description ,
805- ** sample )
814+ if taxon_id is None or scientific_name is None or \
815+ description is None :
816+ raise EBISumbissionError (
817+ "Sample '%s' is missing required EBI submission "
818+ "information. taxon_id: %s; scientific_name: %s; "
819+ "description: %s" % (sample_name , taxon_id ,
820+ scientific_name , description ))
806821
822+ self .add_sample (sample_name , taxon_id , scientific_name ,
823+ description , ** sample )
824+
825+ prep_template_samples = []
807826 for prep in iter_file_via_list_of_dicts (prep_template ):
808827 sample_name = prep .pop ('sample_name' )
828+ prep_template_samples .append (sample_name )
809829 platform = prep .pop ('platform' )
810830 experiment_design_description = prep .pop (
811831 'experiment_design_description' )
@@ -818,6 +838,10 @@ def add_samples_from_templates(self, sample_template, prep_template,
818838 library_construction_protocol ,
819839 ** prep )
820840
841+ to_remove = set (self .samples ).difference (prep_template_samples )
842+ for sample in to_remove :
843+ del self .samples [sample ]
844+
821845 @classmethod
822846 def from_templates_and_per_sample_fastqs (cls , preprocessed_data_id ,
823847 study_title ,
0 commit comments