@@ -122,8 +122,8 @@ def prefix_sample_names_with_id(md_template, study_id):
122122 md_template .index .name = None
123123
124124
125- def load_template_to_dataframe (fn , strip_whitespace = True ):
126- """Load a sample or a prep template into a data frame
125+ def load_template_to_dataframe (fn , strip_whitespace = True , index = 'sample_name' ):
126+ """Load a sample/prep template or a QIIME mapping file into a data frame
127127
128128 Parameters
129129 ----------
@@ -132,6 +132,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
132132 strip_whitespace : bool, optional
133133 Defaults to True. Whether or not to strip whitespace from values in the
134134 input file
135+ index : str, optional
136+ Defaults to 'sample_name'. The index to use in the loaded information
135137
136138 Returns
137139 -------
@@ -167,6 +169,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
167169 +=======================+==============+
168170 | sample_name | str |
169171 +-----------------------+--------------+
172+ | #SampleID | str |
173+ +-----------------------+--------------+
170174 | physical_location | str |
171175 +-----------------------+--------------+
172176 | has_physical_specimen | bool |
@@ -203,6 +207,17 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
203207 controlled_cols .update (CONTROLLED_COLS )
204208 holdfile [0 ] = '\t ' .join (c .lower () if c .lower () in controlled_cols else c
205209 for c in cols )
210+
211+ if index == "#SampleID" :
212+ # We're going to parse a QIIME mapping file. We are going to first
213+ # parse it with the QIIME function so we can remove the comments
214+ # easily and make sure that QIIME will accept this as a mapping file
215+ data , headers , comments = _parse_mapping_file (holdfile )
216+ holdfile = ["%s\n " % '\t ' .join (d ) for d in data ]
217+ holdfile .insert (0 , "%s\n " % '\t ' .join (headers ))
218+ # The QIIME parser fixes the index and removes the #
219+ index = 'SampleID'
220+
206221 # index_col:
207222 # is set as False, otherwise it is cast as a float and we want a string
208223 # keep_default:
@@ -224,7 +239,7 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
224239 keep_default_na = False , na_values = ['' ],
225240 parse_dates = True , index_col = False , comment = '\t ' ,
226241 mangle_dupe_cols = False , converters = {
227- 'sample_name' : lambda x : str (x ).strip (),
242+ index : lambda x : str (x ).strip (),
228243 # required sample template information
229244 'physical_location' : str ,
230245 'sample_type' : str ,
@@ -263,21 +278,22 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
263278
264279 initial_columns = set (template .columns )
265280
266- if 'sample_name' not in template .columns :
267- raise QiitaDBColumnError ("The 'sample_name' column is missing from "
268- "your template, this file cannot be parsed." )
281+ if index not in template .columns :
282+ raise QiitaDBColumnError ("The '%s' column is missing from "
283+ "your template, this file cannot be parsed."
284+ % index )
269285
270286 # remove rows that have no sample identifier but that may have other data
271287 # in the rest of the columns
272- template .dropna (subset = ['sample_name' ], how = 'all' , inplace = True )
288+ template .dropna (subset = [index ], how = 'all' , inplace = True )
273289
274290 # set the sample name as the index
275- template .set_index ('sample_name' , inplace = True )
291+ template .set_index (index , inplace = True )
276292
277293 # it is not uncommon to find templates that have empty columns
278294 template .dropna (how = 'all' , axis = 1 , inplace = True )
279295
280- initial_columns .remove ('sample_name' )
296+ initial_columns .remove (index )
281297 dropped_cols = initial_columns - set (template .columns )
282298 if dropped_cols :
283299 warnings .warn ('The following column(s) were removed from the template '
@@ -315,3 +331,119 @@ def get_invalid_sample_names(sample_names):
315331 inv .append (s )
316332
317333 return inv
334+
335+
336+ def looks_like_qiime_mapping_file (fp ):
337+ """Checks if the file looks like a QIIME mapping file
338+
339+ Parameters
340+ ----------
341+ fp : str or file-like object
342+ filepath to check if it looks like a QIIME mapping file
343+
344+ Returns
345+ -------
346+ bool
347+ True if fp looks like a QIIME mapping file, false otherwise.
348+
349+
350+ Notes
351+ -----
352+ This is not doing a validation of the QIIME mapping file. It simply checks
353+ the first line in the file and it returns true if the line starts with
354+ '#SampleID', since a sample/prep template will start with 'sample_name' or
355+ some other different column.
356+ """
357+ first_line = None
358+ with open_file (fp , mode = 'U' ) as f :
359+ first_line = f .readline ()
360+ if not first_line :
361+ return False
362+
363+ first_col = first_line .split ()[0 ]
364+ return first_col == '#SampleID'
365+
366+
367+ def _parse_mapping_file (lines , strip_quotes = True , suppress_stripping = False ):
368+ """Parser for map file that relates samples to metadata.
369+
370+ Format: header line with fields
371+ optionally other comment lines starting with #
372+ tab-delimited fields
373+
374+ Parameters
375+ ----------
376+ lines : iterable of str
377+ The contents of the QIIME mapping file
378+ strip_quotes : bool, optional
379+ Defaults to true. If true, quotes are removed from the data
380+ suppress_stripping : bool, optional
381+ Defaults to false. If true, spaces are not stripped
382+
383+ Returns
384+ -------
385+ list of lists, list of str, list of str
386+ The data in the mapping file, the headers and the comments
387+
388+ Raises
389+ ------
390+ QiitaDBError
391+ If there is any error parsing the mapping file
392+
393+ Notes
394+ -----
395+ This code has been ported from QIIME.
396+ """
397+ if strip_quotes :
398+ if suppress_stripping :
399+ # remove quotes but not spaces
400+
401+ def strip_f (x ):
402+ return x .replace ('"' , '' )
403+ else :
404+ # remove quotes and spaces
405+
406+ def strip_f (x ):
407+ return x .replace ('"' , '' ).strip ()
408+ else :
409+ if suppress_stripping :
410+ # don't remove quotes or spaces
411+
412+ def strip_f (x ):
413+ return x
414+ else :
415+ # remove spaces but not quotes
416+
417+ def strip_f (x ):
418+ return x .strip ()
419+
420+ # Create lists to store the results
421+ mapping_data = []
422+ header = []
423+ comments = []
424+
425+ # Begin iterating over lines
426+ for line in lines :
427+ line = strip_f (line )
428+ if not line or (suppress_stripping and not line .strip ()):
429+ # skip blank lines when not stripping lines
430+ continue
431+
432+ if line .startswith ('#' ):
433+ line = line [1 :]
434+ if not header :
435+ header = line .strip ().split ('\t ' )
436+ else :
437+ comments .append (line )
438+ else :
439+ # Will add empty string to empty fields
440+ tmp_line = map (strip_f , line .split ('\t ' ))
441+ if len (tmp_line ) < len (header ):
442+ tmp_line .extend (['' ] * (len (header ) - len (tmp_line )))
443+ mapping_data .append (tmp_line )
444+ if not header :
445+ raise QiitaDBError ("No header line was found in mapping file." )
446+ if not mapping_data :
447+ raise QiitaDBError ("No data found in mapping file." )
448+
449+ return mapping_data , header , comments
0 commit comments