Skip to content

Commit

Permalink
Merge pull request #27 from ewels/master
Browse files Browse the repository at this point in the history
Refactored software versions code.
  • Loading branch information
ewels committed Aug 7, 2017
2 parents bc88e60 + 35c5653 commit fd5559d
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 89 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## v0.3dev
* Fix `--rrbs` mode ([#24](https://github.com/SciLifeLab/NGI-MethylSeq/issues/24))
* Merged test scripts and rewrote to use command line flags / new travis script.
* Refactored software version collection code to be more resilient and cleaner / easier to maintain.

## [v0.2](https://github.com/SciLifeLab/NGI-MethylSeq/releases/tag/0.2) - 2017-07-17
First (semi-) stable release of the new NGI-MethylSeq pipeline, as we head towards deployment in production.
7 changes: 0 additions & 7 deletions assets/email_template.html
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,6 @@ <h3>Pipeline Configuration:</h3>
</tbody>
</table>

<h3>Software Versions:</h3>
<table style="width:100%; max-width:100%; border-spacing: 0; border-collapse: collapse; border:0; margin-bottom: 30px;">
<tbody style="border-bottom: 1px solid #ddd;">
<% out << software_versions.collect{ k,v -> "<tr><th style='text-align:left; padding: 8px 0; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'>$k</th><td style='text-align:left; padding: 8px; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'><pre style='white-space: pre-wrap; overflow: visible;'>${v ?: '<span style="color:#999999;">N/A</a>'}</pre></td></tr>" }.join("\n") %>
</tbody>
</table>

<p>NGI-MethylSeq is a bioinformatics best-practice analysis pipeline used for Methylation (BS-Seq) data analysis at the National Genomics Infrastructure at SciLifeLab Stockholm, Sweden.</p>
<p>The pipeline uses Nextflow, a bioinformatics workflow tool. It pre-processes raw data from FastQ inputs, aligns the reads and performs extensive quality-control on the results.</p>
<p>For more information, please see the pipeline homepage: <a href="https://github.com/SciLifeLab/NGI-MethylSeq">https://github.com/SciLifeLab/NGI-MethylSeq</a></p>
Expand Down
5 changes: 0 additions & 5 deletions assets/email_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ Pipeline Configuration:
-----------------------
<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %>


Software Versions:
-----------------------
<% out << software_versions.collect{ k,v -> " - $k: $v" }.join("\n") %>

--
NGI-MethylSeq is a bioinformatics best-practice analysis pipeline used for Methylation (BS-Seq) data analysis at the National Genomics Infrastructure at SciLifeLab Stockholm, Sweden.
The pipeline uses Nextflow, a bioinformatics workflow tool. It pre-processes raw data from FastQ inputs, aligns the reads and performs extensive quality-control on the results.
Expand Down
59 changes: 59 additions & 0 deletions bin/scrape_software_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python
from __future__ import print_function
from collections import OrderedDict
import re

regexes = {
'NGI-MethylSeq': ['v_ngi_methylseq.txt', r"(\S+)"],
'Nextflow': ['v_nextflow.txt', r"(\S+)"],
'Bismark genomePrep': ['v_bismark_genome_preparation.txt', r"Bismark Genome Preparation Version: v(\S+)"],
'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
'Cutadapt': ['v_cutadapt.txt', r"(\S+)"],
'Trim Galore!': ['v_trim_galore.txt', r"version (\S+)"],
'Bismark': ['v_bismark.txt', r"Bismark Version: v(\S+)"],
'Bismark Deduplication': ['v_deduplicate_bismark.txt', r"Deduplicator Version: v(\S+)"],
'Bismark methXtract': ['v_bismark_methylation_extractor.txt', r"Bismark Extractor Version: v(\S+)"],
'Bismark Report': ['v_bismark2report.txt', r"bismark2report version: v(\S+)"],
'Bismark Summary': ['v_bismark2summary.txt', r"bismark2summary version: (\S+)"],
'Samtools': ['v_samtools.txt', r"samtools (\S+)"],
'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
}
results = OrderedDict()
results['NGI-MethylSeq'] = '<span style="color:#999999;\">N/A</span>'
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['Bismark genomePrep'] = '<span style="color:#999999;\">N/A</span>'
results['FastQC'] = '<span style="color:#999999;\">N/A</span>'
results['Cutadapt'] = '<span style="color:#999999;\">N/A</span>'
results['Trim Galore!'] = '<span style="color:#999999;\">N/A</span>'
results['Bismark'] = '<span style="color:#999999;\">N/A</span>'
results['Bismark Deduplication'] = '<span style="color:#999999;\">N/A</span>'
results['Bismark methXtract'] = '<span style="color:#999999;\">N/A</span>'
results['Bismark Report'] = '<span style="color:#999999;\">N/A</span>'
results['Bismark Summary'] = '<span style="color:#999999;\">N/A</span>'
results['Samtools'] = '<span style="color:#999999;\">N/A</span>'
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'

# Search each file using its regex
for k, v in regexes.items():
with open(v[0]) as x:
versions = x.read()
match = re.search(v[1], versions)
if match:
results[k] = "v{}".format(match.group(1))

# Dump to YAML
print ('''
id: 'ngi-rnaseq'
section_name: 'NGI-MethylSeq Software Versions'
section_href: 'https://github.com/SciLifeLab/NGI-MethylSeq'
plot_type: 'html'
description: 'are collected at run time from the software output.'
data: |
<dl class="dl-horizontal">
''')
for k,v in results.items():
print(" <dt>{}</dt><dd>{}</dd>".format(k,v))
print (" </dl>")

102 changes: 25 additions & 77 deletions bismark.nf
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ if( params.bismark_index ){
bismark_index = Channel
.fromPath(params.bismark_index)
.ifEmpty { exit 1, "Bismark index not found: ${params.bismark_index}" }
makeBismarkIndex_stderr = Channel.from(false)
makeBismarkIndex_stderr = Channel.empty()
}
else if ( params.fasta ){
fasta = file(params.fasta)
Expand Down Expand Up @@ -192,7 +192,6 @@ if(!params.bismark_index && fasta){

output:
file "BismarkIndex" into bismark_index
file '.command.err' into makeBismarkIndex_stderr

script:
"""
Expand All @@ -217,12 +216,10 @@ process fastqc {

output:
file '*_fastqc.{zip,html}' into fastqc_results
file '.command.out' into fastqc_stdout

script:
"""
fastqc -q $reads
fastqc --version
"""
}

Expand All @@ -232,7 +229,6 @@ process fastqc {
if(params.notrim){
trimmed_reads = read_files_trimming
trimgalore_results = Channel.from(false)
trimgalore_logs = Channel.from(false)
} else {
process trim_galore {
tag "$name"
Expand All @@ -248,7 +244,7 @@ if(params.notrim){

output:
set val(name), file('*fq.gz') into trimmed_reads
file "*trimming_report.txt" into trimgalore_results, trimgalore_logs
file "*trimming_report.txt" into trimgalore_results
file "*_fastqc.{zip,html}" into trimgalore_fastqc_reports

script:
Expand Down Expand Up @@ -288,7 +284,7 @@ process bismark_align {

output:
file "*.bam" into bam, bam_2
file "*report.txt" into bismark_align_log_1, bismark_align_log_2, bismark_align_log_3, bismark_align_log_4
file "*report.txt" into bismark_align_log_1, bismark_align_log_2, bismark_align_log_3
if(params.unmapped){ file "*.fq.gz" into bismark_unmapped }

script:
Expand Down Expand Up @@ -322,7 +318,6 @@ if (params.nodedup || params.rrbs) {
bismark_dedup_log_1 = Channel.from(false)
bismark_dedup_log_2 = Channel.from(false)
bismark_dedup_log_3 = Channel.from(false)
bismark_deduplicate_stdout = Channel.from(false)
} else {
process bismark_deduplicate {
tag "${bam.baseName}"
Expand All @@ -335,18 +330,15 @@ if (params.nodedup || params.rrbs) {
output:
file "${bam.baseName}.deduplicated.bam" into bam_dedup, bam_dedup_qualimap
file "${bam.baseName}.deduplication_report.txt" into bismark_dedup_log_1, bismark_dedup_log_2, bismark_dedup_log_3
file '.command.out' into bismark_deduplicate_stdout

script:
if (params.singleEnd) {
"""
deduplicate_bismark -s --bam $bam
deduplicate_bismark --version
"""
} else {
"""
deduplicate_bismark -p --bam $bam
deduplicate_bismark --version
"""
}
}
Expand All @@ -373,7 +365,6 @@ process bismark_methXtract {
file "${bam.baseName}_splitting_report.txt" into bismark_splitting_report_1, bismark_splitting_report_2, bismark_splitting_report_3
file "${bam.baseName}.M-bias.txt" into bismark_mbias_1, bismark_mbias_2, bismark_mbias_3
file '*.{png,gz}' into bismark_methXtract_results
file '.command.err' into bismark_methXtract_stderr

script:
comprehensive = params.comprehensive ? '--comprehensive --merge_non_CpG' : ''
Expand Down Expand Up @@ -423,7 +414,6 @@ process bismark_report {

output:
file '*{html,txt}' into bismark_reports_results
file '.command.out' into bismark_report_stdout

script:
name = bismark_align_log_1.toString() - ~/(_R1)?(_trimmed|_val_1).+$/
Expand All @@ -433,7 +423,6 @@ process bismark_report {
--dedup_report $bismark_dedup_log_1 \\
--splitting_report $bismark_splitting_report_1 \\
--mbias_report $bismark_mbias_1
bismark2report --version
"""
}

Expand All @@ -452,12 +441,10 @@ process bismark_summary {

output:
file '*{html,txt}' into bismark_summary_results
file '.command.out' into bismark_summary_stdout

script:
"""
bismark2summary
bismark2summary --version
"""
}

Expand All @@ -473,7 +460,6 @@ process qualimap {

output:
file "${bam.baseName}_qualimap" into qualimap_results
file '.command.out' into qualimap_stdout

script:
gcref = params.genome == 'GRCh37' ? '-gd HUMAN' : ''
Expand All @@ -492,70 +478,33 @@ process qualimap {
/*
* Parse software version numbers
*/
software_versions = [
'Bismark genomePrep': null, 'FastQC': null, 'Trim Galore!': null, 'Bismark': null, 'Bismark Deduplication': null,
'Bismark methXtract': null, 'Bismark Report': null, 'Bismark Summary': null, 'Qualimap': null, 'Nextflow': "v$workflow.nextflow.version"
]
process get_software_versions {
cache false
executor 'local'

input:
val makeBismarkIndex from makeBismarkIndex_stderr
val fastqc from fastqc_stdout.collect()
val trimgalore from trimgalore_logs.collect()
val bismark_align from bismark_align_log_4.collect()
val bismark_deduplicate from bismark_deduplicate_stdout.collect()
val bismark_methXtract from bismark_methXtract_stderr.collect()
val bismark_report from bismark_report_stdout.collect()
val bismark_summary from bismark_summary_stdout.collect()
val qualimap from qualimap_stdout.collect()

output:
file 'software_versions_mqc.yaml' into software_versions_yaml

exec:
if(makeBismarkIndex != false){
software_versions['Bismark genomePrep'] = \
makeBismarkIndex.getText().find(/Bisulfite Genome Indexer version v(\S+)/) { match, version -> "v$version"; }
}
software_versions['FastQC'] = \
fastqc[0].getText().find(/FastQC v(\S+)/) { match, version -> "v$version" }
if(!params.notrim){
software_versions['Trim Galore!'] = \
trimgalore[0].getText().find(/Trim Galore version: (\S+)/) { match, version -> "v$version" }
}
software_versions['Bismark'] = \
bismark_align[0].getText().find(/Bismark report for: .* \(version: v(.+)\)/) { match, version -> "v$version" }
if (!params.nodedup && !params.rrbs) {
software_versions['Bismark Deduplication'] = \
bismark_deduplicate[0].getText().find(/Deduplicator Version: v(\S+)/) { match, version -> "v$version" }
}
software_versions['Bismark methXtract'] = \
bismark_methXtract[0].getText().find(/Bismark methylation extractor version v(\S+)/) { match, version -> "v$version" }
software_versions['Bismark Report'] = \
bismark_report[0].getText().find(/bismark2report version: v(\S+)/) { match, version -> "v$version" }
software_versions['Bismark Summary'] = \
bismark_summary[0].getText().find(/bismark2summary version: (\S+)/) { match, version -> "v$version" }
software_versions['Qualimap'] = \
qualimap[0].getText().find(/QualiMap v.(\S+)/) { match, version -> "v$version" }

def sw_yaml_file = task.workDir.resolve('software_versions_mqc.yaml')
sw_yaml_file.text = """
id: 'ngi-rnaseq'
section_name: 'NGI-MethylSeq Software Versions'
section_href: 'https://github.com/SciLifeLab/NGI-MethylSeq'
plot_type: 'html'
description: 'are collected at run time from the software output.'
data: |
<dl class=\"dl-horizontal\">
${software_versions.collect{ k,v -> " <dt>$k</dt><dd>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</dd>" }.join("\n")}
</dl>
""".stripIndent()

script:
"""
echo $version > v_ngi_methylseq.txt
echo $nextflow.version > v_nextflow.txt
bismark_genome_preparation --version > v_bismark_genome_preparation.txt
fastqc --version > v_fastqc.txt
cutadapt --version > v_cutadapt.txt
trim_galore --version > v_trim_galore.txt
bismark --version > v_bismark.txt
deduplicate_bismark --version > v_deduplicate_bismark.txt
bismark_methylation_extractor --version > v_bismark_methylation_extractor.txt
bismark2report --version > v_bismark2report.txt
bismark2summary --version > v_bismark2summary.txt
samtools --version > v_samtools.txt
qualimap --version > v_qualimap.txt
multiqc --version > v_multiqc.txt
scrape_software_versions.py > software_versions_mqc.yaml
"""
}



/*
* STEP 9 - MultiQC
*/
Expand Down Expand Up @@ -589,7 +538,6 @@ process multiqc {
multiqc -f $rtitle $rfilename --config $multiqc_config .
"""
}
multiqc_stderr.subscribe { stdout -> software_versions['MultiQC'] = stdout.getText().find(/This is MultiQC v(\S+)/) { match, version -> "v$version" } }

/*
* Completion e-mail notification
Expand Down Expand Up @@ -621,9 +569,9 @@ workflow.onComplete {
if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
if(workflow.container) email_fields['summary']['Docker image'] = workflow.container
email_fields['software_versions'] = software_versions
email_fields['software_versions']['Nextflow Build'] = workflow.nextflow.build
email_fields['software_versions']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
email_fields['summary']['Nextflow Version'] = nextflow.version
email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp

// Render the TXT template
def engine = new groovy.text.GStringTemplateEngine()
Expand Down

0 comments on commit fd5559d

Please sign in to comment.