nf-core · apeltzer · Apr 12, 2019 · Apr 10, 2019 · Apr 10, 2019 · Apr 10, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -43,7 +43,7 @@ script:
   # Run the basic pipeline with the test profile
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --saveReference
   # Run the basic pipeline with single end data (pretending its single end actually)
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/bwa_index/
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
   # Run the basic pipeline with paired end data without collapsing
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --skip_collapse --saveReference
   # Run the basic pipeline with paired end data without trimming
@@ -53,13 +53,13 @@ script:
    # Run the basic pipeline with output unmapped reads as fastq
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --strip_input_fastq
   # Run the same pipeline testing optional step: fastp, complexity 
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/bwa_index/
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
   # Test BAM Trimming
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/bwa_index/
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
   # Test running with CircularMapper
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2'
   # Test running with BWA Mem
-  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/bwa_index/
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta
   # Test with zipped reference input
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz'
   # Run the basic pipeline with the bam input profile

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,7 +15,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### `Fixed`
 * [#172](https://github.com/nf-core/eager/pull/152) - DamageProfiler errors [won't crash entire pipeline anymore](https://github.com/nf-core/eager/issues/171)
-* [#174](https://github.com/nf-core/eager/pull/190) - Publish DeDup files [properly](https://github.com/nf-core/eager/issues/183) 
+* [#174](https://github.com/nf-core/eager/pull/190) - Publish DeDup files [properly](https://github.com/nf-core/eager/issues/183)
+* TBF - Fix reference [issues](https://github.com/nf-core/eager/issues/150)
 
 ### `Dependencies`
 

diff --git a/environment.yml b/environment.yml
@@ -29,5 +29,5 @@ dependencies:
   - bioconda::bamutil=1.0.14
   - bioconda::mtnucratio=0.5
   - pysam=0.15.2
-  - python=3.6
+  - python=3.6.7
   #Missing Schmutzi,snpAD
diff --git a/main.nf b/main.nf
@@ -144,7 +144,6 @@ params.genome = "Custom"
 params.snpcapture = false
 params.bedfile = ''
 params.fasta = false
-params.bwa_index = false
 params.seq_dict = false
 params.fasta_index = false
 params.saveReference = false
@@ -226,48 +225,50 @@ params.strip_input_fastq = false
 params.strip_mode = 'strip'
 
 
-ch_multiqc_config = Channel.fromPath(params.multiqc_config)
-ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
-Channel.fromPath("$baseDir/assets/where_are_my_files.txt")
-       .into{ ch_where_for_bwa_index; ch_where_for_fasta_index; ch_where_for_seqdict}
-
+multiqc_config = file(params.multiqc_config)
+output_docs = file("$baseDir/docs/output.md")
+where_are_my_files = file("$baseDir/assets/where_are_my_files.txt")
 // Validate inputs
 if("${params.fasta}".endsWith(".gz")){
     //Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir
-    Channel.fromPath("${params.fasta}")
-            .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
-            .set {ch_unzip_fasta}
+    zipped_fasta = file("${params.fasta}")
 
     process unzip_reference{
         tag "$zipfasta"
 
         input:
-        file zipfasta from ch_unzip_fasta
+        file zipped_fasta
 
         output:
-        file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing,  ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper_index)
+        file "*.fasta" into fasta_for_indexing
 
         script:
         """
         pigz -f -d -p ${task.cpus} $zipfasta
         """
     }   
     } else {
-    Channel.fromPath("${params.fasta}")
-    .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
-    .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper_index}
+    fasta_for_indexing = file("${params.fasta}")
+    lastPath = params.fasta.lastIndexOf(File.separator)
+    bwa_base = params.fasta.substring(lastPath+1)
 }
 
 //Index files provided? Then check whether they are correct and complete
 if (params.aligner != 'bwa' && !params.circularmapper && !params.bwamem){
     exit 1, "Invalid aligner option. Default is bwa, but specify --circularmapper or --bwamem to use these."
 }
 if( params.bwa_index && (params.aligner == 'bwa' | params.bwamem)){
-    bwa_index = Channel
-        .fromPath("${params.bwa_index}",checkIfExists: true)
-        .ifEmpty { exit 1, "BWA index not found: ${params.bwa_index}"}
-        .into{ch_bwa_index;ch_bwa_index_bwamem}
-} 
+    lastPath = params.bwa_index.lastIndexOf(File.separator)
+    bwa_dir =  params.bwa_index.substring(0,lastPath+1)
+    bwa_base = params.bwa_index.substring(lastPath+1)
+
+    Channel
+        .fromPath(bwa_dir, checkIfExists: true)
+        .ifEmpty { exit 1, "BWA index directory not found: ${bwa_dir}" }
+        .into {bwa_index; bwa_index_bwamem}
+}
+
+
 
 //Validate that either pairedEnd or singleEnd has been specified by the user!
 if( params.singleEnd || params.pairedEnd || params.bam){
@@ -280,12 +281,6 @@ if (params.skip_collapse  && params.singleEnd){
     exit 1, "--skip_collapse can only be set for pairedEnd samples!"
 }
 
-//AWSBatch sanity checking
-if(workflow.profile == 'awsbatch'){
-    if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
-    if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!"
-}
-
 //Strip mode sanity checking
 if (params.strip_input_fastq){
     if (!(['strip','replace'].contains(params.strip_mode))) {
@@ -311,11 +306,6 @@ if( workflow.profile == 'awsbatch') {
   // Prevent trace files to be stored on S3 since S3 does not support rolling files.
   if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
 }
-
-// Stage config files
-ch_multiqc_config = Channel.fromPath(params.multiqc_config)
-ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
-
 /*
  * Create a channel for input read files
  * Dump can be used for debugging purposes, e.g. using the -dump-channels operator on run
@@ -445,8 +435,7 @@ ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style
 * Create BWA indices if they are not present
 */ 
 
-if(!params.bwa_index && params.fasta && (params.aligner =='bwa' || params.bwamem)) {
-
+if(!params.bwa_index && params.fasta && (params.aligner == 'bwa' || params.bwamem)){
 process makeBWAIndex {
     tag {fasta}
     publishDir path: "${params.outdir}/reference_genome/bwa_index", mode: 'copy', saveAs: { filename -> 
@@ -455,26 +444,20 @@ process makeBWAIndex {
             else null
     }
 
-    when: !params.bwa_index && params.fasta && (params.aligner == 'bwa' || params.bwamem)
-
     input:
-    file fasta from ch_fasta_for_bwa_indexing
-    file wherearemyfiles from ch_where_for_bwa_index
+    file fasta from fasta_for_indexing
+    file where_are_my_files
 
     output:
-    file "bwa_index" into (ch_bwa_index,ch_bwa_index_bwamem)
+    file "BWAIndex" into (bwa_index, bwa_index_bwamem)
     file "where_are_my_files.txt"
 
     script:
-    base = "${fasta.baseName}"
     """
-    mkdir bwa_index
-    mv "${fasta}" "bwa_index/${base}.fasta"
-    cd bwa_index
-    bwa index "${base}.fasta"
+    bwa index $fasta
+    mkdir BWAIndex && mv ${fasta}* BWAIndex
     """
-}
-
+    }
 }
 
 
@@ -491,20 +474,16 @@ process makeFastaIndex {
     when: !params.fasta_index && params.fasta && params.aligner == 'bwa'
 
     input:
-    file fasta from ch_fasta_for_faidx_indexing
-    file wherearemyfiles from ch_where_for_fasta_index
+    file fasta from fasta_for_indexing
+    file where_are_my_files
 
     output:
-    file "faidx/${base}.fasta.fai" into ch_fasta_faidx_index
-    file "faidx/${base}.fasta"
+    file "${base}.fasta.fai" into ch_fasta_faidx_index
     file "where_are_my_files.txt"
 
     script:
     base = "${fasta.baseName}"
     """
-    mkdir faidx
-    mv $fasta "faidx/${base}.fasta"
-    cd faidx
     samtools faidx "${base}.fasta"
     """
 }
@@ -525,8 +504,8 @@ process makeSeqDict {
     when: !params.seq_dict && params.fasta
 
     input:
-    file fasta from ch_fasta_for_dict_indexing
-    file wherearemyfiles from ch_where_for_seqdict
+    file fasta from fasta_for_indexing
+    file where_are_my_files
 
     output:
     file "seq_dict/*.dict" into ch_seq_dict
@@ -722,8 +701,7 @@ process bwa {
 
     input:
     set val(name), file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa)
-
-    file index from ch_bwa_index.first()
+    file index from bwa_index
 
 
     output:
@@ -732,13 +710,13 @@ process bwa {
 
 
     script:
-    fasta = "${index}/*.fasta"
     size = "${params.large_ref}" ? '-c' : ''
+    fasta = "${index}/${bwa_base}"
 
     //PE data without merging, PE data without any AR applied
     if (!params.singleEnd && (params.skip_collapse || params.skip_adapterremoval)){
     prefix = reads[0].toString().tokenize('.')[0]
-    """ 
+    """
     bwa aln -t ${task.cpus} $fasta ${reads[0]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r1.sai
     bwa aln -t ${task.cpus} $fasta ${reads[1]} -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.r2.sai
     bwa sampe -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.r1.sai ${prefix}.r2.sai ${reads[0]} ${reads[1]} | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.sorted.bam
@@ -747,7 +725,7 @@ process bwa {
     } else {
     //PE collapsed, or SE data 
     prefix = reads[0].toString().tokenize('.')[0]
-    """ 
+    """
     bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f ${prefix}.sai
     bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta ${prefix}.sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
     samtools index "${size}" "${prefix}".sorted.bam
@@ -767,18 +745,15 @@ process circulargenerator{
     when: params.circularmapper
 
     input:
-    file fasta from ch_fasta_for_circularmapper_index
+    file fasta from fasta_for_indexing
 
     output:
-    file "cm_index" into ch_circularmapper_indices
+    file "${prefix}.{amb,ann,bwt,sa,pac}" into ch_circularmapper_indices
 
     script:
     prefix = "${fasta.baseName}_${params.circularextension}.fasta"
     """
-    mkdir cm_index
     circulargenerator -e ${params.circularextension} -i $fasta -s ${params.circulartarget}
-    cp "${fasta.baseName}_${params.circularextension}.fasta" cm_index/
-    cd cm_index
     bwa index $prefix
     """
 
@@ -793,7 +768,8 @@ process circularmapper{
 
     input:
     set val(name), file(reads) from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm)
-    file index from ch_circularmapper_indices.first()
+    file index from ch_circularmapper_indices
+    file fasta from fasta_for_indexing
 
     output:
     file "*.sorted.bam" into ch_mapped_reads_idxstats_cm,ch_mapped_reads_filter_cm,ch_mapped_reads_preseq_cm, ch_mapped_reads_damageprofiler_cm, ch_circular_mapped_reads_strip
@@ -802,7 +778,6 @@ process circularmapper{
     script:
     filter = "${params.circularfilter}" ? '' : '-f true -x false'
 
-    fasta = "${index}/*_*.fasta"
     size = "${params.large_ref}" ? '-c' : ''
 
     if (!params.singleEnd && params.skip_collapse ){
@@ -836,16 +811,16 @@ process bwamem {
 
     input:
     set val(name), file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem)
-    file index from ch_bwa_index_bwamem.first()
+    file index from bwa_index_bwamem
 
     output:
     file "*.sorted.bam" into ch_bwamem_mapped_reads_idxstats,ch_bwamem_mapped_reads_filter,ch_bwamem_mapped_reads_preseq, ch_bwamem_mapped_reads_damageprofiler, ch_bwamem_mapped_reads_strip
     file "*.{bai,csi}" 
 
 
     script:
+    fasta = "${index}/${bwa_base}"
     prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
-    fasta = "${index}/*.fasta"
     size = "${params.large_ref}" ? '-c' : ''
 
     if (!params.singleEnd && params.skip_collapse){
@@ -1061,7 +1036,7 @@ process damageprofiler {
 
     input:
     file bam from ch_mapped_reads_damageprofiler.mix(ch_mapped_reads_damageprofiler_cm,ch_bwamem_mapped_reads_damageprofiler)
-    file fasta from ch_fasta_for_damageprofiler.first()
+    file fasta from fasta_for_indexing
     file bai from ch_bam_index_for_damageprofiler
 
 
@@ -1088,7 +1063,7 @@ process qualimap {
 
     input:
     file bam from ch_bam_filtered_qualimap
-    file fasta from ch_fasta_for_qualimap.first()
+    file fasta from fasta_for_indexing
 
     output:
     file "*" into ch_qualimap_results
@@ -1152,7 +1127,7 @@ process pmdtools {
 
     input: 
     file bam from ch_for_pmdtools
-    file fasta from ch_fasta_for_pmdtools
+    file fasta from fasta_for_indexing
 
     output:
     file "*.bam" into ch_bam_after_pmdfiltering
@@ -1237,7 +1212,7 @@ process output_documentation {
     publishDir "${params.outdir}/Documentation", mode: 'copy'
 
     input:
-    file output_docs from ch_output_docs
+    file output_docs
 
     output:
     file "results_description.html"
@@ -1291,7 +1266,7 @@ process multiqc {
     publishDir "${params.outdir}/MultiQC", mode: 'copy'
 
     input:
-    file multiqc_config from ch_multiqc_config.collect().ifEmpty([])
+    file multiqc_config
     file ('fastqc_raw/*') from ch_fastqc_results.collect().ifEmpty([])
     file('fastqc/*') from ch_fastqc_after_clipping.collect().ifEmpty([])
     file ('software_versions/software_versions_mqc*') from software_versions_yaml.collect().ifEmpty([])
@@ -1307,7 +1282,7 @@ process multiqc {
     file workflow_summary from create_workflow_summary(summary)
 
     output:
-    file "*multiqc_report.html" into ch_multiqc_report
+    file "*multiqc_report.html" into multiqc_report
     file "*_data"
 
     script: