diff --git a/_viash.yaml b/_viash.yaml index 71d5d0ddda4..a69c58b5527 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,4 +1,4 @@ -viash_version: 0.7.5 +viash_version: 0.8.0 source: src target: target diff --git a/workflows/ingestion/demux/config.vsh.yaml b/src/workflows/ingestion/demux/config.vsh.yaml similarity index 66% rename from workflows/ingestion/demux/config.vsh.yaml rename to src/workflows/ingestion/demux/config.vsh.yaml index f58346cbf4d..2718fa674f1 100644 --- a/workflows/ingestion/demux/config.vsh.yaml +++ b/src/workflows/ingestion/demux/config.vsh.yaml @@ -1,6 +1,6 @@ functionality: name: demux - namespace: ingestion + namespace: workflows/ingestion description: | Convert `.bcl` files to `.fastq` files using bcl2fastq, bcl-convert or Cell Ranger mkfastq. info: @@ -24,13 +24,11 @@ functionality: description: ID of the sample. example: foo - name: "--input" - alternatives: [ "-i" ] type: file required: true description: Input run directory example: bcl_dir - name: "--sample_sheet" - alternatives: [ "-s" ] type: file required: true description: Pointer to the sample sheet @@ -43,11 +41,38 @@ functionality: - name: "--ignore_missing" type: boolean description: Should the demultiplexer ignore missing entities (filter, ...) + - name: "--output_fastq" + type: file + direction: output + required: true + description: Output directory containig fastq files + example: fastq_dir + - name: "--output_fastqc" + type: file + direction: output + required: false + description: Reports directory produced by FastQC + example: reports_dir + - name: "--output_multiqc" + type: file + direction: output + required: false + description: Reports directory produced by MultiQC + example: reports_dir resources: - type: nextflow_script path: main.nf + entrypoint: run_wf test_resources: - type: nextflow_script - path: main.nf + path: test.nf entrypoint: test_wf - path: /resources_test/cellranger_tiny_bcl + dependencies: + - name: demux/cellranger_mkfastq + - name: demux/bcl_convert + - name: demux/bcl2fastq + - name: qc/fastqc + - name: qc/multiqc +platforms: + - type: nextflow \ No newline at end of file diff --git a/workflows/ingestion/demux/integration_test.sh b/src/workflows/ingestion/demux/integration_test.sh similarity index 86% rename from workflows/ingestion/demux/integration_test.sh rename to src/workflows/ingestion/demux/integration_test.sh index a2ccff1fcfb..afb8bc35d29 100755 --- a/workflows/ingestion/demux/integration_test.sh +++ b/src/workflows/ingestion/demux/integration_test.sh @@ -12,7 +12,7 @@ export NXF_VER=21.10.6 nextflow \ run . \ - -main-script workflows/ingestion/demux/main.nf \ + -main-script src/workflows/ingestion/demux/test.nf \ -entry test_wf \ -resume \ -profile docker,no_publish \ diff --git a/src/workflows/ingestion/demux/main.nf b/src/workflows/ingestion/demux/main.nf new file mode 100644 index 00000000000..fb989048b79 --- /dev/null +++ b/src/workflows/ingestion/demux/main.nf @@ -0,0 +1,60 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + // translate the demultiplexer argument to the component name that needs to be run + | map { id, state -> + def funcNameMapper = [ + bclconvert: "bcl_convert", + bcl2fastq: "bcl2fastq", + mkfastq: "cellranger_mkfastq" + ] + def funcName = funcNameMapper[state.demultiplexer] + def newState = state + [funcName: funcName] + [id, newState] + } + + // run the demultiplexers + | runComponents( + components: [cellranger_mkfastq, bcl_convert, bcl2fastq], + filter: { id, state, config -> + state.funcName == config.functionality.name + }, + fromState: { id, state, config -> + def data = [ + input: state.input, + sample_sheet: state.sample_sheet, + reports: null // disable reports so they end up in the output dir + ] + if (config.functionality.name == "bcl2fastq") { + data.ignore_missing = state.ignore_missing + } + data + }, + toState: ["output_fastq": "output"] + ) + + // run fastqc + | fastqc.run( + fromState: ["input": "output_fastq"], + args: [mode: "dir"], + toState: ["output_fastqc": "output"] + ) + + // run multiqc + | multiqc.run( + fromState: { id, state -> + ["input": [state.output_fastq]] + }, + toState: ["output_multiqc": "output"] + ) + + // subset state to the outputs + | setState(["output_fastq", "output_fastqc", "output_multiqc"]) + + emit: + output_ch +} diff --git a/workflows/ingestion/demux/nextflow.config b/src/workflows/ingestion/demux/nextflow.config similarity index 68% rename from workflows/ingestion/demux/nextflow.config rename to src/workflows/ingestion/demux/nextflow.config index 425fb92fede..956cffd9e95 100644 --- a/workflows/ingestion/demux/nextflow.config +++ b/src/workflows/ingestion/demux/nextflow.config @@ -3,7 +3,7 @@ manifest { } params { - rootDir = java.nio.file.Paths.get("$projectDir/../../../").toAbsolutePath().normalize().toString() + rootDir = java.nio.file.Paths.get("$projectDir/../../../../").toAbsolutePath().normalize().toString() } // include common settings diff --git a/src/workflows/ingestion/demux/test.nf b/src/workflows/ingestion/demux/test.nf new file mode 100644 index 00000000000..8fc780cde58 --- /dev/null +++ b/src/workflows/ingestion/demux/test.nf @@ -0,0 +1,57 @@ +nextflow.enable.dsl=2 + +// TODO: once viash supports 'viash test' on this workflow, +// - the import will be added automatically +// - access the test resources with `meta.resources_dir' instead of `params.resources_test' + +include { demux } from params.rootDir + "/target/nextflow/workflows/ingestion/demux/main.nf" + +workflow test_wf { + // TODO: change this to `resources_test = meta.resources_dir` + resources_test = file("${params.rootDir}/resources_test") + + // or when running from s3: + Channel.fromList([ + [ + id: "mkfastq_test", + input: resources_test.resolve("cellranger_tiny_bcl/bcl"), + sample_sheet: resources_test.resolve("cellranger_tiny_bcl/bcl/sample_sheet.csv"), + demultiplexer: "mkfastq" + ], + [ + id: "bclconvert_test", + input: resources_test.resolve("cellranger_tiny_bcl/bcl2/"), + sample_sheet: resources_test.resolve("cellranger_tiny_bcl/bcl2/sample_sheet.csv"), + demultiplexer: "bclconvert" + ], + [ + id: "bcl2fastq_test", + input: resources_test.resolve("cellranger_tiny_bcl/bcl"), + sample_sheet: resources_test.resolve("cellranger_tiny_bcl/bcl/sample_sheet.csv"), + demultiplexer: "bcl2fastq", + ignore_missing: true + ] + ]) + | map{ state -> [state.id, state] } + | demux + | view { output -> + assert output.size() == 2 : "outputs should contain two elements; [id, state]" + + def id = output[0] + assert id.contains("_test") + + def state = output[1] + assert state.containsKey("output_fastq") : "State should contain output_fastq" + assert state.output_fastq.isDirectory() : "output_fastq should be a directory." + assert state.containsKey("output_fastqc") : "State should contain output_fastqc" + assert state.output_fastqc.isDirectory() : "output_fastqc should be a directory." + assert state.containsKey("output_multiqc") : "State should contain output_multiqc" + assert state.output_multiqc.isDirectory() : "output_multiqc should be a directory." + + "Output: $output" + } + | toSortedList() + | map { output_list -> + assert output_list.size() == 3 : "There should be three outputs" + } +} diff --git a/workflows/ingestion/demux/main.nf b/workflows/ingestion/demux/main.nf deleted file mode 100644 index 757ac1cf010..00000000000 --- a/workflows/ingestion/demux/main.nf +++ /dev/null @@ -1,125 +0,0 @@ -nextflow.enable.dsl=2 - -workflowDir = params.rootDir + "/workflows" -targetDir = params.rootDir + "/target/nextflow" - -include { cellranger_mkfastq } from targetDir + "/demux/cellranger_mkfastq/main.nf" -include { bcl_convert } from targetDir + "/demux/bcl_convert/main.nf" -include { bcl2fastq } from targetDir + "/demux/bcl2fastq/main.nf" -include { fastqc } from targetDir + "/qc/fastqc/main.nf" -include { multiqc } from targetDir + "/qc/multiqc/main.nf" - -include { readConfig; channelFromParams; preprocessInputs; helpMessage } from workflowDir + "/utils/WorkflowHelper.nf" -include { passthroughMap as pmap } from workflowDir + "/utils/DataflowHelper.nf" - -config = readConfig("$workflowDir/ingestion/demux/config.vsh.yaml") - -workflow { - helpMessage(config) - - channelFromParams(params, config) - | run_wf -} - -workflow run_wf { - take: - input_ch - - main: - commonOptions = [ - args: [ output: "fastq/\$id" ], - auto: [ publish: true ] - ] - preprocessed_ch = input_ch - | preprocessInputs("config": config) - - mkfastq_ch = preprocessed_ch - | filter{ it[1].demultiplexer == "mkfastq" } - | cellranger_mkfastq.run(commonOptions) - - bcl_convert_ch = preprocessed_ch - | filter{ it[1].demultiplexer == "bclconvert" } - | bcl_convert.run(commonOptions) - - bcl2fastq_ch = preprocessed_ch - | filter{ it[1].demultiplexer == "bcl2fastq" } - | bcl2fastq.run(commonOptions) - - /* Combine the different demultiplexer channels */ - all_ch = - mkfastq_ch - | mix( bcl_convert_ch, bcl2fastq_ch ) - | map { tup -> - [tup[0], tup[1].output] - } - - /* Generate fastqc reports for every sample */ - all_ch - | fastqc.run( - [ - args: [ mode: "dir", output: "fastqc/\$id" ], - auto: [ publish: true ] - ] - ) - - /* Generate multiqc report */ - all_ch - | map{ it[1] } - | toSortedList - | map{ [ "multiqc", it ] } - | multiqc.run( - args: [ output: "multiqc/report" ], - auto: [ publish: true ] - ) - - output_ch = all_ch - | map { tup -> - [tup[0], ["output": tup[1]]] - } - - emit: - output_ch -} - -workflow test_wf { - // allow changing the resources_test dir - params.resources_test = params.rootDir + "/resources_test" - - // or when running from s3: params.resources_test = "s3://openpipelines-data/" - params.param_list = [ - [ - id: "mkfastq_test", - input: params.resources_test + "/cellranger_tiny_bcl/bcl", - sample_sheet: params.resources_test + "/cellranger_tiny_bcl/bcl/sample_sheet.csv", - demultiplexer: "mkfastq" - ], - [ - id: "bclconvert_test", - input: params.resources_test + "/cellranger_tiny_bcl/bcl2/", - sample_sheet: params.resources_test + "/cellranger_tiny_bcl/bcl2/sample_sheet.csv", - demultiplexer: "bclconvert" - ], - [ - id: "bcl2fastq_test", - input: params.resources_test + "/cellranger_tiny_bcl/bcl", - sample_sheet: params.resources_test + "/cellranger_tiny_bcl/bcl/sample_sheet.csv", - demultiplexer: "bcl2fastq", - ignore_missing: true - ] - ] - - output_ch = - channelFromParams(params, config) - | view{ "Input: $it" } - | run_wf - | view { output -> - assert output.size() == 2 : "outputs should contain two elements; [id, file]" - assert output[1].output.isDirectory() : "Output path should be a directory." - // todo: check whether output dir contains fastq files - "Output: $output" - } - | toList() - | map { output_list -> - assert output_list.size() == 3 : "There should be three outputs" - } -}