From 32cc237991a73f2025d01f6bf0eb80fadda8098d Mon Sep 17 00:00:00 2001 From: Simon Sadedin Date: Thu, 28 Aug 2014 21:50:38 +1000 Subject: [PATCH] fix inputs passed in wrong order to parallel stage after 'produce' when lexical sorting differs to input order --- src/main/groovy/bpipe/InputSplitter.groovy | 21 ++++++++++++++----- src/main/groovy/bpipe/PipelineCategory.groovy | 6 +++--- src/main/groovy/bpipe/PipelineContext.groovy | 2 +- tests/ordered_input_to_parallel/cleanup.sh | 1 + tests/ordered_input_to_parallel/run.sh | 7 +++++++ tests/ordered_input_to_parallel/test.1.fq.gz | 0 tests/ordered_input_to_parallel/test.2.fq.gz | 0 tests/ordered_input_to_parallel/test.groovy | 13 ++++++++++++ tests/ordered_input_to_parallel/touch | 0 9 files changed, 41 insertions(+), 9 deletions(-) create mode 100755 tests/ordered_input_to_parallel/cleanup.sh create mode 100755 tests/ordered_input_to_parallel/run.sh create mode 100644 tests/ordered_input_to_parallel/test.1.fq.gz create mode 100644 tests/ordered_input_to_parallel/test.2.fq.gz create mode 100644 tests/ordered_input_to_parallel/test.groovy create mode 100644 tests/ordered_input_to_parallel/touch diff --git a/src/main/groovy/bpipe/InputSplitter.groovy b/src/main/groovy/bpipe/InputSplitter.groovy index c9c3cf77..d5ab778b 100755 --- a/src/main/groovy/bpipe/InputSplitter.groovy +++ b/src/main/groovy/bpipe/InputSplitter.groovy @@ -1,9 +1,12 @@ package bpipe import groovy.util.logging.Log; + import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.naming.ldap.SortResponseControl; + /** * Implements logic for splitting input files into groups by sample using simple * wildcard patterns. The goal is to provide a simpler way for people to specify @@ -26,6 +29,8 @@ import java.util.regex.Pattern; */ @Log class InputSplitter { + + boolean sortResults = true /** * Splits the given inputs up according to the specified pattern @@ -85,11 +90,17 @@ class InputSplitter { unsortedResult[group] << inp } - // We now have all the inputs keyed on the part matching the split char, - // however we want to also sort them - Map sortedResult = [:] - unsortedResult.each { k,v -> - sortedResult[k] = this.sortNumericThenLexically(pattern, splitGroups, v) + if(sortResults) { + // We now have all the inputs keyed on the part matching the split char, + // however we want to also sort them + Map sortedResult = [:] + unsortedResult.each { k,v -> + sortedResult[k] = this.sortNumericThenLexically(pattern, splitGroups, v) + } + return sortedResult + } + else { + return unsortedResult } } diff --git a/src/main/groovy/bpipe/PipelineCategory.groovy b/src/main/groovy/bpipe/PipelineCategory.groovy index 668e8f7b..eaa15a52 100755 --- a/src/main/groovy/bpipe/PipelineCategory.groovy +++ b/src/main/groovy/bpipe/PipelineCategory.groovy @@ -156,7 +156,7 @@ class PipelineCategory { */ static Object plus(Closure other, List segments) { Pipeline pipeline = Pipeline.currentUnderConstructionPipeline - Closure mul = splitOnFiles("*", segments, false) + Closure mul = splitOnFiles("*", segments, false, false) def plusImplementation = { input1 -> def currentStage = new PipelineStage(Pipeline.currentRuntimePipeline.get().createContext(), other) @@ -331,7 +331,7 @@ class PipelineCategory { * @param requireMatch if true, the pipeline will fail if there are * no matches to the pattern */ - static Object splitOnFiles(def pattern, List segments, boolean requireMatch) { + static Object splitOnFiles(def pattern, List segments, boolean requireMatch, boolean sortResults=true) { Pipeline pipeline = Pipeline.currentRuntimePipeline.get() ?: Pipeline.currentUnderConstructionPipeline def multiplyImplementation = { input -> @@ -339,7 +339,7 @@ class PipelineCategory { log.info "multiply on input $input with pattern $pattern" // Match the input - InputSplitter splitter = new InputSplitter() + InputSplitter splitter = new InputSplitter(sortResults:sortResults) Map samples = splitter.split(pattern, input) if(samples.isEmpty() && !requireMatch && pattern == "*") diff --git a/src/main/groovy/bpipe/PipelineContext.groovy b/src/main/groovy/bpipe/PipelineContext.groovy index 7b509600..be521b40 100755 --- a/src/main/groovy/bpipe/PipelineContext.groovy +++ b/src/main/groovy/bpipe/PipelineContext.groovy @@ -959,7 +959,7 @@ class PipelineContext { if(doExecute) { if(Utils.box(this.@output)) { - this.output = Utils.box(fixedOutputs) + Utils.box(this.@output) + this.output = Utils.box(fixedOutputs) + Utils.box(this.@output).grep { ! it in fixedOutputs } this.output.removeAll { it in replacedOutputs || toOutputFolder(it) in replacedOutputs} } else { diff --git a/tests/ordered_input_to_parallel/cleanup.sh b/tests/ordered_input_to_parallel/cleanup.sh new file mode 100755 index 00000000..e889bb78 --- /dev/null +++ b/tests/ordered_input_to_parallel/cleanup.sh @@ -0,0 +1 @@ +rm -f *cutadapt.* diff --git a/tests/ordered_input_to_parallel/run.sh b/tests/ordered_input_to_parallel/run.sh new file mode 100755 index 00000000..11f28eef --- /dev/null +++ b/tests/ordered_input_to_parallel/run.sh @@ -0,0 +1,7 @@ +source ../testsupport.sh + +run + +grep -q "inputs should not be equal" test.out && err "Input referenced multiple times" + +true diff --git a/tests/ordered_input_to_parallel/test.1.fq.gz b/tests/ordered_input_to_parallel/test.1.fq.gz new file mode 100644 index 00000000..e69de29b diff --git a/tests/ordered_input_to_parallel/test.2.fq.gz b/tests/ordered_input_to_parallel/test.2.fq.gz new file mode 100644 index 00000000..e69de29b diff --git a/tests/ordered_input_to_parallel/test.groovy b/tests/ordered_input_to_parallel/test.groovy new file mode 100644 index 00000000..a6f3eed1 --- /dev/null +++ b/tests/ordered_input_to_parallel/test.groovy @@ -0,0 +1,13 @@ + +cutadapt = { + produce(["cutadapt.1.fq.gz", "cutadapt.2.fq.gz"]){ + exec "touch $output1; touch $output2; " + } +} + +tophat = { + if(input1.toString() == input2.toString()) + fail "inputs should not be equal" +} + +run { cutadapt + [tophat] } diff --git a/tests/ordered_input_to_parallel/touch b/tests/ordered_input_to_parallel/touch new file mode 100644 index 00000000..e69de29b