Skip to content

Commit

Permalink
ENH: Add read length seven-number summary (#84)
Browse files Browse the repository at this point in the history
  • Loading branch information
jakereps authored and thermokarst committed Jun 20, 2018
1 parent 28c2081 commit ac3346b
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 0 deletions.
22 changes: 22 additions & 0 deletions q2_demux/_summarize/_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,20 @@ def _compute_stats_of_df(df):
return df_stats


def _build_seq_len_table(qscores: pd.DataFrame) -> str:
sequence_lengths = qscores.notnull().sum(axis=1).copy()
stats = _compute_stats_of_df(sequence_lengths)

stats[stats.index != 'count'] = \
stats[stats.index != 'count'].astype(int).apply('{} nts'.format)

stats.rename(index={'50%': '50% (Median)',
'count': 'Total Sequences Sampled'},
inplace=True)
frame = stats.to_frame(name="")
return q2templates.df_to_html(frame)


def summarize(output_dir: str, data: _PlotQualView, n: int=10000) -> None:
paired = data.paired
data = data.directory_format
Expand Down Expand Up @@ -152,19 +166,24 @@ def summarize(output_dir: str, data: _PlotQualView, n: int=10000) -> None:
forward_stats.to_csv(os.path.join(output_dir,
'forward-seven-number-summaries.csv'),
header=True, index=True)
forward_length_table = _build_seq_len_table(forward_scores)

if (forward_stats.loc['50%'] > 45).any():
dangers.append('Some of the PHRED quality values are out of range. '
'This is likely because an incorrect PHRED offset '
'was chosen on import of your raw data. You can learn '
'how to choose your PHRED offset during import in the '
'importing tutorial.')

# Required initilization for conditional display of the table
reverse_length_table = None
if paired:
reverse_scores = pd.DataFrame(quality_scores['reverse'])
reverse_stats = _compute_stats_of_df(reverse_scores)
reverse_stats.to_csv(os.path.join(output_dir,
'reverse-seven-number-summaries.csv'),
header=True, index=True)
reverse_length_table = _build_seq_len_table(reverse_scores)

show_plot = len(fwd) > 1
if show_plot:
Expand All @@ -187,7 +206,10 @@ def summarize(output_dir: str, data: _PlotQualView, n: int=10000) -> None:
'max': result.max(),
'sum': sequence_count
},
'forward_length_table': forward_length_table,
'reverse_length_table': reverse_length_table,
'result': html,
'n_samples': result.count(),
'show_plot': show_plot,
'paired': paired,
'tabs': [{'title': 'Overview',
Expand Down
1 change: 1 addition & 0 deletions q2_demux/_summarize/assets/overview.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ <h1>Demultiplexed sequence counts summary</h1>
<div class="row">
<div class="col-lg-12">
<h1>Per-sample sequence counts</h1>
<h4>Total Samples: {{ n_samples }}</h4>
{{ result }}
<a href="per-sample-fastq-counts.csv">Download as CSV</a>
</div>
Expand Down
18 changes: 18 additions & 0 deletions q2_demux/_summarize/assets/quality-plot.html
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ <h5 class="text-center">Reverse Reads</h5>
<div id="forwardContainer" class="col-xs-12 col-md-6 col-md-offset-3"></div>
{% endif %}
</div>

<div class="row">
<div class="col-lg-6 col-lg-offset-3">
<h1>Demultiplexed sequence length summary</h1>
{% set colsize = 6 if reverse_length_table else 12 %}
<div class="col-lg-{{ colsize }}">
<h4>Forward Reads</h4>
{{ forward_length_table }}
</div>
{% if reverse_length_table %}
<div class="col-lg-{{ colsize }}">
<h4>Reverse Reads</h4>
{{ reverse_length_table }}
</div>
{% endif %}
</div>
</div>

<script src="./dist/bundle.js" charset="utf-8"></script>
<script src="./data.jsonp" charset="utf-8"></script>
{% endblock %}

0 comments on commit ac3346b

Please sign in to comment.