Skip to content

Commit

Permalink
BUG: Support sample IDs with underscores in summarize (#50)
Browse files Browse the repository at this point in the history
Fixes #49
  • Loading branch information
maxvonhippel authored and ebolyen committed Jun 29, 2017
1 parent 4745119 commit 6871b42
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
20 changes: 12 additions & 8 deletions q2_demux/_summarize/_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,14 @@ def __init__(self, directory_format, paired):
self.paired = paired


def _link_sample_n_to_file(files, counts, subsample_ns):
def _link_sample_n_to_file(file_records, counts, subsample_ns):
results = collections.defaultdict(list)
for num in subsample_ns:
total = 0
for file in files:
sample_name = os.path.basename(file).split('_', 1)[0]
total += counts[sample_name]
for file, sample_id in file_records:
total += counts[sample_id]
if num < total:
idx = counts[sample_name] - (total - num)
idx = counts[sample_id] - (total - num)
results[file].append(idx)
break
return results
Expand Down Expand Up @@ -109,12 +108,15 @@ def summarize(output_dir: str, data: _PlotQualView, n: int=10000) -> None:

per_sample_fastq_counts = {}
reads = rev if not fwd and rev else fwd
file_records = []
for file in reads:
count = 0
for seq in _read_fastq_seqs(file):
count += 1
sample_name = os.path.basename(file).split('_', 1)[0]
per_sample_fastq_counts[sample_name] = count
sample_id = manifest.loc[manifest.filename == file,
'sample-id'].iloc[0]
per_sample_fastq_counts[sample_id] = count
file_records.append((file, sample_id))

result = pd.Series(per_sample_fastq_counts)
result.name = 'Sequence count'
Expand All @@ -131,7 +133,9 @@ def summarize(output_dir: str, data: _PlotQualView, n: int=10000) -> None:
'was generated using all available sequences.')

subsample_ns = sorted(random.sample(range(sequence_count), n))
link = _link_sample_n_to_file(reads, per_sample_fastq_counts, subsample_ns)
link = _link_sample_n_to_file(file_records,
per_sample_fastq_counts,
subsample_ns)
if paired:
sample_map = [(file, rev[fwd.index(file)], link[file])
for file in link]
Expand Down
6 changes: 5 additions & 1 deletion q2_demux/tests/test_demux.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,8 @@ def setUp(self):
def test_basic(self):
bsi = BarcodeSequenceFastqIterator(self.barcodes, self.sequences)

barcode_map = pd.Series(['AAAA', 'AACC'], index=['sample1', 'sample2'])
barcode_map = pd.Series(['AAAA', 'AACC'],
index=['sample_1', 'sample2'])
barcode_map = qiime2.MetadataCategory(barcode_map)

demux_data = emp_single(bsi, barcode_map)
Expand All @@ -677,6 +678,9 @@ def test_basic(self):
html = fh.read()
self.assertIn('<td>Minimum:</td><td>1</td>', html)
self.assertIn('<td>Maximum:</td><td>3</td>', html)
with open(csv_fp, 'r') as ch:
csv = ch.read()
self.assertIn('sample_1', csv)

def test_single_sample(self):
bsi = BarcodeSequenceFastqIterator(self.barcodes[:1],
Expand Down

0 comments on commit 6871b42

Please sign in to comment.