Skip to content

Commit

Permalink
Performance enhancements for pipelines with many output files - mainl…
Browse files Browse the repository at this point in the history
…y eliminating long log file lines and caching canonical path lookups
  • Loading branch information
ssadedin committed Aug 23, 2015
1 parent 953b50e commit 8855f29
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
9 changes: 5 additions & 4 deletions src/main/groovy/bpipe/Dependencies.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class GraphEntry {
@CompileStatic
GraphEntry entryFor(File outputFile) {
// In case of non-default output directory, the outputFile itself may be in a directory
final String outputFilePath = outputFile.canonicalPath
final String outputFilePath = Utils.canonicalFileFor(outputFile.path).path
return entryForCanonicalPath(outputFilePath)
}

Expand All @@ -123,7 +123,7 @@ class GraphEntry {
if(p.containsKey("canonicalPath"))
return p["canonicalPath"]

p["canonicalPath"] = ((File)p["outputFile"]).canonicalPath
p["canonicalPath"] = Utils.canonicalFileFor(((File)p["outputFile"]).path).path
}
}

Expand All @@ -132,7 +132,7 @@ class GraphEntry {
*/
Properties propertiesFor(String outputFile) {
// In case of non-default output directory, the outputFile itself may be in a directory
String outputFilePath = new File(outputFile).canonicalPath
String outputFilePath = Utils.canonicalFileFor(outputFile).path
def values = entryForCanonicalPath(outputFilePath)?.values
if(!values)
return null
Expand Down Expand Up @@ -798,7 +798,8 @@ class Dependencies {
// Find all entries with inputs that are not outputs of any other entry
def outputsWithExternalInputs = outputs.grep { p -> ! p.inputs.any { allOutputs.contains(it) } }

log.info "External inputs: " + outputsWithExternalInputs*.inputs + " for outputs " + outputsWithExternalInputs*.outputPath
// NOTE: turning this log on can be expensive for large numbers of inputs and outputs
// log.info "External inputs: " + outputsWithExternalInputs*.inputs + " for outputs " + outputsWithExternalInputs*.outputPath

handledOutputs.addAll(outputsWithExternalInputs)

Expand Down
3 changes: 2 additions & 1 deletion src/main/groovy/bpipe/PipelineCategory.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,8 @@ class PipelineCategory {
new PipelineContext(null, parent.stages, joiners, new Branch(name:'all'))
def mergedOutputs = finalStages.collect { s ->
Utils.box(s?.context?.nextInputs ?: s?.context?.@output)
}.sum().unique { new File(it).canonicalPath }
}.sum().collect { Utils.canonicalFileFor(it).path }.unique()

log.info "Last merged outputs are $mergedOutputs"
mergedContext.setRawOutput(mergedOutputs)
PipelineStage mergedStage = new PipelineStage(mergedContext, finalStages.find { it != null }.body)
Expand Down

0 comments on commit 8855f29

Please sign in to comment.