Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ref genomes & other behaviour #194

Merged
merged 19 commits into from Apr 12, 2019
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions .travis.yml
Expand Up @@ -43,7 +43,7 @@ script:
# Run the basic pipeline with the test profile
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference
# Run the basic pipeline with single end data (pretending its single end actually)
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
# Run the basic pipeline with paired end data without collapsing
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference
# Run the basic pipeline with paired end data without trimming
Expand All @@ -53,13 +53,13 @@ script:
# Run the basic pipeline with output unmapped reads as fastq
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --strip_input_fastq
# Run the same pipeline testing optional step: fastp, complexity
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
# Test BAM Trimming
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/bwa_index/
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
# Test running with CircularMapper
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2'
# Test running with BWA Mem
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
# Test with zipped reference input
- nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz'
# Run the basic pipeline with the bam input profile
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.md
Expand Up @@ -15,7 +15,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### `Fixed`
* [#172](https://github.com/nf-core/eager/pull/152) - DamageProfiler errors [won't crash entire pipeline anymore](https://github.com/nf-core/eager/issues/171)
* [#174](https://github.com/nf-core/eager/pull/190) - Publish DeDup files [properly](https://github.com/nf-core/eager/issues/183)
* [#174](https://github.com/nf-core/eager/pull/190) - Publish DeDup files [properly](https://github.com/nf-core/eager/issues/183)
* TBF - Fix reference [issues](https://github.com/nf-core/eager/issues/150)

### `Dependencies`

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Expand Up @@ -29,5 +29,5 @@ dependencies:
- bioconda::bamutil=1.0.14
- bioconda::mtnucratio=0.5
- pysam=0.15.2
- python=3.6
- python=3.6.7
#Missing Schmutzi,snpAD
121 changes: 48 additions & 73 deletions main.nf
Expand Up @@ -144,7 +144,6 @@ params.genome = "Custom"
params.snpcapture = false
params.bedfile = ''
params.fasta = false
params.bwa_index = false
params.seq_dict = false
params.fasta_index = false
params.saveReference = false
Expand Down Expand Up @@ -226,48 +225,50 @@ params.strip_input_fastq = false
params.strip_mode = 'strip'


ch_multiqc_config = Channel.fromPath(params.multiqc_config)
ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
Channel.fromPath("$baseDir/assets/where_are_my_files.txt")
.into{ ch_where_for_bwa_index; ch_where_for_fasta_index; ch_where_for_seqdict}

multiqc_config = file(params.multiqc_config)
output_docs = file("$baseDir/docs/output.md")
where_are_my_files = file("$baseDir/assets/where_are_my_files.txt")
// Validate inputs
if("${params.fasta}".endsWith(".gz")){
//Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir
Channel.fromPath("${params.fasta}")
.ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
.set {ch_unzip_fasta}
zipped_fasta = file("${params.fasta}")

process unzip_reference{
tag "$zipfasta"

input:
file zipfasta from ch_unzip_fasta
file zipped_fasta

output:
file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing, ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper_index)
file "*.fasta" into fasta_for_indexing

script:
"""
pigz -f -d -p ${task.cpus} $zipfasta
"""
}
} else {
Channel.fromPath("${params.fasta}")
.ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
.into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper_index}
fasta_for_indexing = file("${params.fasta}")
lastPath = params.fasta.lastIndexOf(File.separator)
bwa_base = params.fasta.substring(lastPath+1)
}

//Index files provided? Then check whether they are correct and complete
if (params.aligner != 'bwa' && !params.circularmapper && !params.bwamem){
exit 1, "Invalid aligner option. Default is bwa, but specify --circularmapper or --bwamem to use these."
}
if( params.bwa_index && (params.aligner == 'bwa' | params.bwamem)){
bwa_index = Channel
.fromPath("${params.bwa_index}",checkIfExists: true)
.ifEmpty { exit 1, "BWA index not found: ${params.bwa_index}"}
.into{ch_bwa_index;ch_bwa_index_bwamem}
}
lastPath = params.bwa_index.lastIndexOf(File.separator)
bwa_dir = params.bwa_index.substring(0,lastPath+1)
bwa_base = params.bwa_index.substring(lastPath+1)

Channel
.fromPath(bwa_dir, checkIfExists: true)
.ifEmpty { exit 1, "BWA index directory not found: ${bwa_dir}" }
.into {bwa_index; bwa_index_bwamem}
}



//Validate that either pairedEnd or singleEnd has been specified by the user!
if( params.singleEnd || params.pairedEnd || params.bam){
Expand All @@ -280,12 +281,6 @@ if (params.skip_collapse && params.singleEnd){
exit 1, "--skip_collapse can only be set for pairedEnd samples!"
}

//AWSBatch sanity checking
if(workflow.profile == 'awsbatch'){
if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!"
}

//Strip mode sanity checking
if (params.strip_input_fastq){
if (!(['strip','replace'].contains(params.strip_mode))) {
Expand All @@ -311,11 +306,6 @@ if( workflow.profile == 'awsbatch') {
// Prevent trace files to be stored on S3 since S3 does not support rolling files.
if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
}

// Stage config files
ch_multiqc_config = Channel.fromPath(params.multiqc_config)
ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")

/*
* Create a channel for input read files
* Dump can be used for debugging purposes, e.g. using the -dump-channels operator on run
Expand Down Expand Up @@ -445,8 +435,7 @@ ${summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style
* Create BWA indices if they are not present
*/

if(!params.bwa_index && params.fasta && (params.aligner =='bwa' || params.bwamem)) {

if(!params.bwa_index && params.fasta && (params.aligner == 'bwa' || params.bwamem)){
process makeBWAIndex {
tag {fasta}
publishDir path: "${params.outdir}/reference_genome/bwa_index", mode: 'copy', saveAs: { filename ->
Expand All @@ -455,26 +444,20 @@ process makeBWAIndex {
else null
}

when: !params.bwa_index && params.fasta && (params.aligner == 'bwa' || params.bwamem)

input:
file fasta from ch_fasta_for_bwa_indexing
file wherearemyfiles from ch_where_for_bwa_index
file fasta from fasta_for_indexing
file where_are_my_files

output:
file "bwa_index" into (ch_bwa_index,ch_bwa_index_bwamem)
file "BWAIndex" into (bwa_index, bwa_index_bwamem)
file "where_are_my_files.txt"

script:
base = "${fasta.baseName}"
"""
mkdir bwa_index
mv "${fasta}" "bwa_index/${base}.fasta"
cd bwa_index
bwa index "${base}.fasta"
bwa index $fasta
mkdir BWAIndex && mv ${fasta}* BWAIndex
"""
}

}
}


Expand All @@ -491,20 +474,16 @@ process makeFastaIndex {
when: !params.fasta_index && params.fasta && params.aligner == 'bwa'

input:
file fasta from ch_fasta_for_faidx_indexing
file wherearemyfiles from ch_where_for_fasta_index
file fasta from fasta_for_indexing
file where_are_my_files

output:
file "faidx/${base}.fasta.fai" into ch_fasta_faidx_index
file "faidx/${base}.fasta"
file "${base}.fasta.fai" into ch_fasta_faidx_index
file "where_are_my_files.txt"

script:
base = "${fasta.baseName}"
"""
mkdir faidx
mv $fasta "faidx/${base}.fasta"
cd faidx
samtools faidx "${base}.fasta"
"""
}
Expand All @@ -525,8 +504,8 @@ process makeSeqDict {
when: !params.seq_dict && params.fasta

input:
file fasta from ch_fasta_for_dict_indexing
file wherearemyfiles from ch_where_for_seqdict
file fasta from fasta_for_indexing
file where_are_my_files

output:
file "seq_dict/*.dict" into ch_seq_dict
Expand Down Expand Up @@ -722,8 +701,7 @@ process bwa {

input:
set val(name), file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa)

file index from ch_bwa_index.first()
file index from bwa_index


output:
Expand All @@ -732,13 +710,13 @@ process bwa {


script:
fasta = "${index}/*.fasta"
size = "${params.large_ref}" ? '-c' : ''
fasta = "${index}/${bwa_base}"

//PE data without merging, PE data without any AR applied
if (!params.singleEnd && (params.skip_collapse || params.skip_adapterremoval)){
prefix = reads[0].toString().tokenize('.')[0]
"""
"""
bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai
bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai
bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.sorted.bam
Expand All @@ -747,7 +725,7 @@ process bwa {
} else {
//PE collapsed, or SE data
prefix = reads[0].toString().tokenize('.')[0]
"""
"""
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
samtools index "${size}" "${prefix}".sorted.bam
Expand All @@ -767,18 +745,15 @@ process circulargenerator{
when: params.circularmapper

input:
file fasta from ch_fasta_for_circularmapper_index
file fasta from fasta_for_indexing

output:
file "cm_index" into ch_circularmapper_indices
file "${prefix}.{amb,ann,bwt,sa,pac}" into ch_circularmapper_indices

script:
prefix = "${fasta.baseName}_${params.circularextension}.fasta"
"""
mkdir cm_index
circulargenerator -e ${params.circularextension} -i $fasta -s ${params.circulartarget}
cp "${fasta.baseName}_${params.circularextension}.fasta" cm_index/
cd cm_index
bwa index $prefix
"""

Expand All @@ -793,7 +768,8 @@ process circularmapper{

input:
set val(name), file(reads) from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm)
file index from ch_circularmapper_indices.first()
file index from ch_circularmapper_indices
file fasta from fasta_for_indexing

output:
file "*.sorted.bam" into ch_mapped_reads_idxstats_cm,ch_mapped_reads_filter_cm,ch_mapped_reads_preseq_cm, ch_mapped_reads_damageprofiler_cm, ch_circular_mapped_reads_strip
Expand All @@ -802,7 +778,6 @@ process circularmapper{
script:
filter = "${params.circularfilter}" ? '' : '-f true -x false'

fasta = "${index}/*_*.fasta"
size = "${params.large_ref}" ? '-c' : ''

if (!params.singleEnd && params.skip_collapse ){
Expand Down Expand Up @@ -836,16 +811,16 @@ process bwamem {

input:
set val(name), file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem)
file index from ch_bwa_index_bwamem.first()
file index from bwa_index_bwamem

output:
file "*.sorted.bam" into ch_bwamem_mapped_reads_idxstats,ch_bwamem_mapped_reads_filter,ch_bwamem_mapped_reads_preseq, ch_bwamem_mapped_reads_damageprofiler, ch_bwamem_mapped_reads_strip
file "*.{bai,csi}"


script:
fasta = "${index}/${bwa_base}"
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
fasta = "${index}/*.fasta"
size = "${params.large_ref}" ? '-c' : ''

if (!params.singleEnd && params.skip_collapse){
Expand Down Expand Up @@ -1061,7 +1036,7 @@ process damageprofiler {

input:
file bam from ch_mapped_reads_damageprofiler.mix(ch_mapped_reads_damageprofiler_cm,ch_bwamem_mapped_reads_damageprofiler)
file fasta from ch_fasta_for_damageprofiler.first()
file fasta from fasta_for_indexing
file bai from ch_bam_index_for_damageprofiler


Expand All @@ -1088,7 +1063,7 @@ process qualimap {

input:
file bam from ch_bam_filtered_qualimap
file fasta from ch_fasta_for_qualimap.first()
file fasta from fasta_for_indexing

output:
file "*" into ch_qualimap_results
Expand Down Expand Up @@ -1152,7 +1127,7 @@ process pmdtools {

input:
file bam from ch_for_pmdtools
file fasta from ch_fasta_for_pmdtools
file fasta from fasta_for_indexing

output:
file "*.bam" into ch_bam_after_pmdfiltering
Expand Down Expand Up @@ -1237,7 +1212,7 @@ process output_documentation {
publishDir "${params.outdir}/Documentation", mode: 'copy'

input:
file output_docs from ch_output_docs
file output_docs

output:
file "results_description.html"
Expand Down Expand Up @@ -1291,7 +1266,7 @@ process multiqc {
publishDir "${params.outdir}/MultiQC", mode: 'copy'

input:
file multiqc_config from ch_multiqc_config.collect().ifEmpty([])
file multiqc_config
file ('fastqc_raw/*') from ch_fastqc_results.collect().ifEmpty([])
file('fastqc/*') from ch_fastqc_after_clipping.collect().ifEmpty([])
file ('software_versions/software_versions_mqc*') from software_versions_yaml.collect().ifEmpty([])
Expand All @@ -1307,7 +1282,7 @@ process multiqc {
file workflow_summary from create_workflow_summary(summary)

output:
file "*multiqc_report.html" into ch_multiqc_report
file "*multiqc_report.html" into multiqc_report
file "*_data"

script:
Expand Down