Merge pull request #83 from nf-core/caching

Fix caching problems
nf-core · Jan 30, 2024 · 9999e23 · 9999e23
2 parents 6268eb5 + cf629ae
commit 9999e23
Show file tree

Hide file tree

Showing 26 changed files with 80 additions and 140 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -67,4 +67,4 @@ jobs:
 
       - name: Run pipeline with test data
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_cache,docker --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets
diff --git a/bin/circRNA_counts_matrix.py b/bin/circRNA_counts_matrix.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import sys, glob
 from collections import defaultdict
 

diff --git a/bin/reformat_count_matrix.R b/bin/reformat_count_matrix.R
@@ -4,7 +4,7 @@
 ## License: MIT
 
 library(dplyr)
-mat <- read.table("circRNA_matrix.txt", sep="\t", header=T, stringsAsFactors=F)
+mat <- read.table("circRNA_matrix.txt", sep="\t", header=T, stringsAsFactors=F, check.names=F)
 mat$ID <- with(mat, paste0(Chr, sep=":", Start, sep="-", Stop, sep=":", Strand))
 mat <- mat[,-c(1:4)]
 mat1 <- mat %>% select(ID, everything())

diff --git a/conf/test.config b/conf/test.config
@@ -19,16 +19,16 @@ params {
     max_memory                 = 6.GB
     max_time                   = 6.h
 
-    // Input data for test data
-    input                      = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/samples.csv'
-    fasta                      = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/reference/chrI.fa'
-    gtf                        = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/reference/chrI.gtf'
-    mature                     = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/reference/mature.fa'
-    species                    = 'cel'
-    tool                       = 'circexplorer2'
-    phenotype                  = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/phenotype.csv'
+    // Test input data
+    input                      = "${params.test_data_base}/samples.csv"
+    fasta                      = "${params.test_data_base}/reference/chrI.fa"
+    gtf                        = "${params.test_data_base}/reference/chrI.gtf"
+    mature                     = "${params.test_data_base}/reference/mature.fa"
+    tool                       = "circexplorer2"
+    phenotype                  = "${params.test_data_base}/phenotype.csv"
     skip_trimming              = false
-    module                     = 'circrna_discovery,mirna_prediction,differential_expression'
-    outdir                     = 'results/'
+    module                     = "circrna_discovery,mirna_prediction,differential_expression"
+    outdir                     = "results/"
     bsj_reads                  = 2
+    species                    = "cel"
 }
diff --git a/conf/test_cache.config b/conf/test_cache.config
diff --git a/modules/local/annotation/full_annotation/main.nf b/modules/local/annotation/full_annotation/main.nf
@@ -33,7 +33,7 @@ process ANNOTATION {
     annotate_outputs.sh $exon_boundary &> ${prefix}.log
     mv master_bed12.bed ${prefix}.bed.tmp
 
-    awk -v FS="\t" '{print \$11}' ${prefix}.bed.tmp > mature_len.tmp
+    awk -v FS="\\t" '{print \$11}' ${prefix}.bed.tmp > mature_len.tmp
     awk -v FS="," '{for(i=t=0;i<NF;) t+=\$++i; \$0=t}1' mature_len.tmp > mature_length
 
     paste ${prefix}.bed.tmp mature_length > ${prefix}.bed

diff --git a/modules/local/annotation/parent_gene/main.nf b/modules/local/annotation/parent_gene/main.nf
@@ -35,14 +35,14 @@ process PARENT_GENE {
         start=\$(echo \$line | cut -d- -f1 | cut -d: -f2)
         stop=\$(echo \$line | cut -d- -f2 | cut -d: -f1)
         sign=\$(echo \$line | cut -d: -f3)
-        echo -e "\$chr\t\$start\t\$stop\t\$name\t0\t\$sign" >> \${name}.bed
+        echo -e "\$chr\\t\$start\\t\$stop\\t\$name\\t0\\t\$sign" >> \${name}.bed
     done < IDs.txt
 
     cat *.bed > merged.txt && rm IDs.txt && rm *.bed && mv merged.txt circs.bed
 
     # Re-use annotation script to identify the host gene.
     annotate_outputs.sh $exon_boundary &> annotation.log
-    awk -v OFS="\t" '{print \$4, \$14}' master_bed12.bed > circrna_host-gene.txt
+    awk -v OFS="\\t" '{print \$4, \$14}' master_bed12.bed > circrna_host-gene.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/circexplorer2/filter/main.nf b/modules/local/circexplorer2/filter/main.nf
@@ -24,9 +24,9 @@ process CIRCEXPLORER2_FILTER {
     prefix = task.ext.prefix ?: "${meta.id}"
     def VERSION = '1.3.4'
     """
-    awk '{if(\$13 >= ${bsj_reads}) print \$0}' ${prefix}.txt | awk -v OFS="\t" '{print \$1,\$2,\$3,\$6,\$13}' > ${prefix}_${meta.tool}.bed
+    awk '{if(\$13 >= ${bsj_reads}) print \$0}' ${prefix}.txt | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$6,\$13}' > ${prefix}_${meta.tool}.bed
 
-    awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_${meta.tool}.bed > ${prefix}_${meta.tool}_circs.bed
+    awk -v OFS="\\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_${meta.tool}.bed > ${prefix}_${meta.tool}_circs.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/circexplorer2/reference/main.nf b/modules/local/circexplorer2/reference/main.nf
@@ -27,7 +27,7 @@ process CIRCEXPLORER2_REFERENCE {
         $gtf \
         ${prefix}.genepred
 
-    awk -v OFS="\t" '{print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.txt
+    awk -v OFS="\\t" '{print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/circrna_finder/filter/main.nf b/modules/local/circrna_finder/filter/main.nf
@@ -29,9 +29,9 @@ process CIRCRNA_FINDER_FILTER {
     mkdir -p star_dir && mv *.tab *.junction *.sam star_dir
     postProcessStarAlignment.pl --starDir star_dir/ --outDir ./
 
-    awk '{if(\$5 >= ${bsj_reads}) print \$0}' ${prefix}.filteredJunctions.bed | awk  -v OFS="\t" -F"\t" '{print \$1,\$2,\$3,\$6,\$5}' > ${prefix}_circrna_finder.bed
+    awk '{if(\$5 >= ${bsj_reads}) print \$0}' ${prefix}.filteredJunctions.bed | awk  -v OFS="\\t" -F"\\t" '{print \$1,\$2,\$3,\$6,\$5}' > ${prefix}_circrna_finder.bed
 
-    awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_circrna_finder.bed > ${prefix}_circrna_finder_circs.bed
+    awk -v OFS="\\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_circrna_finder.bed > ${prefix}_circrna_finder_circs.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/ciriquant/filter/main.nf b/modules/local/ciriquant/filter/main.nf
@@ -24,16 +24,16 @@ process CIRIQUANT_FILTER {
     def VERSION = '1.3.4'
     """
     grep -v "#" ${prefix}.gtf | awk '{print \$14}' | cut -d '.' -f1 > counts
-    grep -v "#" ${prefix}.gtf | awk -v OFS="\t" '{print \$1,\$4,\$5,\$7}' > ${prefix}.tmp
+    grep -v "#" ${prefix}.gtf | awk -v OFS="\\t" '{print \$1,\$4,\$5,\$7}' > ${prefix}.tmp
     paste ${prefix}.tmp counts > ${prefix}_unfilt.bed
 
     awk '{if(\$5 >= ${bsj_reads}) print \$0}' ${prefix}_unfilt.bed > ${prefix}_filt.bed
     grep -v '^\$' ${prefix}_filt.bed > ${prefix}_ciriquant
 
-    awk -v OFS="\t" '{\$2-=1;print}' ${prefix}_ciriquant > ${prefix}_ciriquant.bed
+    awk -v OFS="\\t" '{\$2-=1;print}' ${prefix}_ciriquant > ${prefix}_ciriquant.bed
     rm ${prefix}.gtf
 
-    awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_ciriquant.bed > ${prefix}_ciriquant_circs.bed
+    awk -v OFS="\\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_ciriquant.bed > ${prefix}_ciriquant_circs.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/count_matrix/combined/main.nf b/modules/local/count_matrix/combined/main.nf
@@ -20,20 +20,18 @@ process COUNTS_COMBINED {
     script:
     def args = task.ext.args ?: ''
     """
-    python ${workflow.projectDir}/bin/circRNA_counts_matrix.py > matrix.txt
+    circRNA_counts_matrix.py > matrix.txt
     ## handle non-canon chromosomes here (https://stackoverflow.com/questions/71479919/joining-columns-based-on-number-of-fields)
     n_samps=\$(ls *.bed | wc -l)
     canon=\$(awk -v a="\$n_samps" 'BEGIN {print a + 4}')
-    awk -v n="\$canon" '{ for (i = 2; i <= NF - n + 1; ++i) { \$1 = \$1"-"\$i; \$i=""; } } 1' matrix.txt | awk -v OFS="\t" '\$1=\$1' > circRNA_matrix.txt
-    Rscript ${workflow.projectDir}/bin/reformat_count_matrix.R
+    awk -v n="\$canon" '{ for (i = 2; i <= NF - n + 1; ++i) { \$1 = \$1"-"\$i; \$i=""; } } 1' matrix.txt | awk -v OFS="\\t" '\$1=\$1' > circRNA_matrix.txt
+    reformat_count_matrix.R
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         awk: \$(awk --version | head -n 1 | cut -d' ' -f3 | sed 's/,//g')
         r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
-        argparser: \$(Rscript -e "library(arparser); cat(as.character(packageVersion('argparser')))")
         dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))")
-        python: \$(python --version | sed -e 's/Python //g')
     END_VERSIONS
     """
 }
diff --git a/modules/local/count_matrix/merge_tools/main.nf b/modules/local/count_matrix/merge_tools/main.nf
@@ -31,7 +31,7 @@ process MERGE_TOOLS {
 
     ## Use intersection of "n" (params.tool_filter) circRNAs called by tools
     ## remove duplicate IDs, keep highest count.
-    Rscript ${workflow.projectDir}/bin/consolidate_algorithms_intersection.R samples.csv $tool_filter $duplicates_fun
+    consolidate_algorithms_intersection.R samples.csv $tool_filter $duplicates_fun
     mv combined_counts.bed ${prefix}.bed
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/count_matrix/single/main.nf b/modules/local/count_matrix/single/main.nf
@@ -33,7 +33,7 @@ process COUNTS_SINGLE {
     ## handle non-canon chromosomes here (https://stackoverflow.com/questions/71479919/joining-columns-based-on-number-of-fields)
     n_samps=\$(ls *.bed | wc -l)
     canon=\$(awk -v a="\$n_samps" 'BEGIN {print a + 4}')
-    awk -v n="\$canon" '{ for (i = 2; i <= NF - n + 1; ++i) { \$1 = \$1"-"\$i; \$i=""; } } 1' matrix.txt | awk -v OFS="\t" '\$1=\$1' > circRNA_matrix.txt
+    awk -v n="\$canon" '{ for (i = 2; i <= NF - n + 1; ++i) { \$1 = \$1"-"\$i; \$i=""; } } 1' matrix.txt | awk -v OFS="\\t" '\$1=\$1' > circRNA_matrix.txt
     Rscript ${workflow.projectDir}/bin/reformat_count_matrix.R
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/dcc/dcc/main.nf b/modules/local/dcc/dcc/main.nf
@@ -31,7 +31,7 @@ process DCC {
         DCC @samplesheet -D -an $gtf -Pi -ss -F -M -Nr 1 1 -fg -A $fasta -N -T ${task.cpus}
 
         awk '{print \$6}' CircCoordinates >> strand
-        paste CircRNACount strand | tail -n +2 | awk -v OFS="\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt
+        paste CircRNACount strand | tail -n +2 | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
@@ -49,7 +49,7 @@ process DCC {
         DCC @samplesheet -mt1 @mate1file -mt2 @mate2file -D -an $gtf -Pi -ss -F -M -Nr 1 1 -fg -A $fasta -N -T ${task.cpus}
 
         awk '{print \$6}' CircCoordinates >> strand
-        paste CircRNACount strand | tail -n +2 | awk -v OFS="\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt
+        paste CircRNACount strand | tail -n +2 | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":

diff --git a/modules/local/dcc/filter/main.nf b/modules/local/dcc/filter/main.nf
@@ -24,8 +24,8 @@ process DCC_FILTER {
     def VERSION = '1.3.4'
     """
     awk '{if(\$5 >= ${bsj_reads}) print \$0}' ${prefix}.txt > ${prefix}_dcc.filtered
-    awk -v OFS="\t" '{\$2-=1;print}' ${prefix}_dcc.filtered > ${prefix}_dcc.bed
-    awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_dcc.bed > ${prefix}_dcc_circs.bed
+    awk -v OFS="\\t" '{\$2-=1;print}' ${prefix}_dcc.filtered > ${prefix}_dcc.bed
+    awk -v OFS="\\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_dcc.bed > ${prefix}_dcc_circs.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/fasta/main.nf b/modules/local/fasta/main.nf
@@ -9,7 +9,7 @@ process FASTA {
 
     input:
     tuple val(meta), path(bed)
-    path fasta
+    path(fasta, stageAs: 'fasta.input') // TODO: Remove input renaming, currently necessary to prevent problems with the backsplice_gen.sh script
 
     output:
     tuple val(meta), path("${prefix}.fa"), emit: analysis_fasta
@@ -24,15 +24,14 @@ process FASTA {
     prefix = task.ext.prefix ?: "${meta.id}"
     """
     ## FASTA sequences (bedtools does not like the extra annotation info - split will not work properly)
-    cut -d\$'\t' -f1-12 ${prefix}.bed > bed12.tmp
+    cut -d\$'\\t' -f1-12 $bed > bed12.tmp
     bedtools getfasta -fi $fasta -bed bed12.tmp -s -split -name > circ_seq.tmp
 
     ## clean fasta header
     grep -A 1 '>' circ_seq.tmp | cut -d: -f1,2,3 > ${prefix}.fa && rm circ_seq.tmp
 
     ## add backsplice sequence for miRanda Targetscan, publish canonical FASTA to results.
-    rm $fasta
-    bash ${workflow.projectDir}/bin/backsplice_gen.sh ${prefix}.fa
+    backsplice_gen.sh ${prefix}.fa
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/find_circ/filter/main.nf b/modules/local/find_circ/filter/main.nf
@@ -30,9 +30,9 @@ process FIND_CIRC_FILTER {
         maxlength.py 100000 \
         > ${prefix}.txt
 
-    tail -n +2 ${prefix}.txt | awk -v OFS="\t" '{print \$1,\$2,\$3,\$6,\$5}' > ${prefix}_find_circ.bed
+    tail -n +2 ${prefix}.txt | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$6,\$5}' > ${prefix}_find_circ.bed
 
-    awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_find_circ.bed > ${prefix}_find_circ_circs.bed
+    awk -v OFS="\\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_find_circ.bed > ${prefix}_find_circ_circs.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/mirna_targets/main.nf b/modules/local/mirna_targets/main.nf
@@ -22,8 +22,8 @@ process MIRNA_TARGETS {
     prefix = task.ext.prefix ?: "${meta.id}"
     """
     ## reformat and sort miRanda, TargetScan outputs, convert to BED for overlaps.
-    tail -n +2 $targetscan | sort -k1,1 -k4n | awk -v OFS="\t" '{print \$1, \$2, \$4, \$5, \$9}' | awk -v OFS="\t" '{print \$2, \$3, \$4, \$1, "0", \$5}' > targetscan.bed
-    tail -n +2 $miranda | sort -k2,2 -k7n | awk -v OFS="\t" '{print \$2, \$1, \$3, \$4, \$7, \$8}' | awk -v OFS="\t" '{print \$2, \$5, \$6, \$1, \$3, \$4}' | sed 's/^[^-]*-//g' > miranda.bed
+    tail -n +2 $targetscan | sort -k1,1 -k4n | awk -v OFS="\\t" '{print \$1, \$2, \$4, \$5, \$9}' | awk -v OFS="\t" '{print \$2, \$3, \$4, \$1, "0", \$5}' > targetscan.bed
+    tail -n +2 $miranda | sort -k2,2 -k7n | awk -v OFS="\\t" '{print \$2, \$1, \$3, \$4, \$7, \$8}' | awk -v OFS="\t" '{print \$2, \$5, \$6, \$1, \$3, \$4}' | sed 's/^[^-]*-//g' > miranda.bed
 
     ## intersect, consolidate miRanda, TargetScan information about miRs.
     ## -wa to output miRanda hits - targetscan makes it difficult to resolve duplicate miRNAs at MRE sites.
@@ -32,8 +32,8 @@ process MIRNA_TARGETS {
 
     ## remove duplicate miRNA entries at MRE sites.
     ## strategy: sory by circs, sort by start position, sort by site type - the goal is to take the best site type (i.e rank site type found at MRE site).
-    paste ${prefix}.mirnas.tmp mirna_type | sort -k3,3 -k2n -k7r | awk -v OFS="\t" '{print \$4,\$1,\$2,\$3,\$5,\$6,\$7}' | awk -F "\t" '{if (!seen[\$1,\$2,\$3,\$4,\$5,\$6]++)print}' | sort -k1,1 -k3n > ${prefix}.mirna_targets.tmp
-    echo -e "circRNA\tmiRNA\tStart\tEnd\tScore\tEnergy_KcalMol\tSite_type" | cat - ${prefix}.mirna_targets.tmp > ${prefix}.mirna_targets.txt
+    paste ${prefix}.mirnas.tmp mirna_type | sort -k3,3 -k2n -k7r | awk -v OFS="\\t" '{print \$4,\$1,\$2,\$3,\$5,\$6,\$7}' | awk -F "\\t" '{if (!seen[\$1,\$2,\$3,\$4,\$5,\$6]++)print}' | sort -k1,1 -k3n > ${prefix}.mirna_targets.tmp
+    echo -e "circRNA\\tmiRNA\\tStart\\tEnd\\tScore\\tEnergy_KcalMol\\tSite_type" | cat - ${prefix}.mirna_targets.tmp > ${prefix}.mirna_targets.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/segemehl/filter/main.nf b/modules/local/segemehl/filter/main.nf
@@ -24,11 +24,11 @@ process SEGEMEHL_FILTER {
     prefix = task.ext.prefix ?: "${meta.id}"
     def VERSION = '1.3.4'
     """
-    grep ';C;' ${prefix}.sngl.bed | awk -v OFS="\t" '{print \$1,\$2,\$3,\$6}' | sort | uniq -c | awk -v OFS="\t" '{print \$2,\$3,\$4,\$5,\$1}' > ${prefix}_collapsed.bed
+    grep ';C;' ${prefix}.sngl.bed | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$6}' | sort | uniq -c | awk -v OFS="\\t" '{print \$2,\$3,\$4,\$5,\$1}' > ${prefix}_collapsed.bed
 
-    awk -v OFS="\t" -v BSJ=${bsj_reads} '{if(\$5>=BSJ) print \$0}' ${prefix}_collapsed.bed > ${prefix}_segemehl.bed
+    awk -v OFS="\\t" -v BSJ=${bsj_reads} '{if(\$5>=BSJ) print \$0}' ${prefix}_collapsed.bed > ${prefix}_segemehl.bed
 
-    awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_segemehl.bed > ${prefix}_segemehl_circs.bed
+    awk -v OFS="\\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_segemehl.bed > ${prefix}_segemehl_circs.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/stringtie/prepde/main.nf b/modules/local/stringtie/prepde/main.nf
@@ -19,7 +19,7 @@ process STRINGTIE_PREPDE {
 
     script:
     """
-    for file in \$(ls *.gtf); do sample_id=\${file%".transcripts.gtf"}; touch samples.txt; printf "\$sample_id\t\$file\\n" >> samples.txt ; done
+    for file in \$(ls *.gtf); do sample_id=\${file%".transcripts.gtf"}; touch samples.txt; printf "\$sample_id\\t\$file\\n" >> samples.txt ; done
 
     prepDE.py -i samples.txt
 

diff --git a/modules/local/targetscan/predict/main.nf b/modules/local/targetscan/predict/main.nf
@@ -25,7 +25,7 @@ process TARGETSCAN {
     ##format for targetscan
     cat $fasta | grep ">" | sed 's/>//g' > id
     cat $fasta | grep -v ">" > seq
-    paste id seq | awk -v OFS="\t" '{print \$1, "0000", \$2}' > ${prefix}_ts.txt
+    paste id seq | awk -v OFS="\\t" '{print \$1, "0000", \$2}' > ${prefix}_ts.txt
     # run targetscan
     targetscan_70.pl mature.txt ${prefix}_ts.txt ${prefix}.txt
 

diff --git a/nextflow.config b/nextflow.config
@@ -94,10 +94,12 @@ params {
     max_cpus                   = 50
     max_time                   = '240.h'
 
+    test_data_base             = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna'
+
     // Schema validation default options
     validationFailUnrecognisedParams = false
     validationLenientMode            = false
-    validationSchemaIgnoreParams     = 'genomes,igenomes_base'
+    validationSchemaIgnoreParams     = 'genomes,igenomes_base,test_data_base'
     validationShowHiddenParams       = false
     validate_params                  = true
 }
@@ -211,7 +213,6 @@ profiles {
         executor.memory        = 8.GB
     }
     test          { includeConfig 'conf/test.config'          }
-    test_cache    { includeConfig 'conf/test_cache.config'    }
     test_igenomes { includeConfig 'conf/test_igenomes.config' }
     full          { includeConfig 'conf/full.config'          }
     test_full     { includeConfig 'conf/test_full.config'     }