diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa2cb82..25934400 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ The pipeline is using Nextflow DSL2, each process will be run with its own [Bioc | `percolator` | 3.5 | | `pmultiqc` | 0.0.10 | | `luciphor` | 2020_04_03 | -| `dia-nn` | 1.8 | +| `dia-nn` | 1.8.1 | +| `msstats` | 4.2.0 | ### `Deprecated` diff --git a/conf/modules.config b/conf/modules.config index ebcaaf58..aeb9d30e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,7 +41,7 @@ process { // OPENMSPEAKPICKER if (params.openms_peakpicking){ process { - withName: 'NFCORE_QUANTMS:QUANTMS:.*:OPENMSPEAKPICKER' { + withName: '.*:OPENMSPEAKPICKER' { publishDir = [ path: { "${params.outdir}/openmspeakpicker" }, mode: params.publish_dir_mode, @@ -55,7 +55,7 @@ if (params.openms_peakpicking){ // DECOYDATABASE if (params.add_decoys) { process { - withName: 'NFCORE_QUANTMS:QUANTMS:.*:DECOYDATABASE' { + withName: '.*:DECOYDATABASE' { publishDir = [ path: { "${params.outdir}/decoydatabase" }, mode: params.publish_dir_mode, @@ -69,7 +69,7 @@ if (params.add_decoys) { if (params.posterior_probabilities == "percolator") { process { // EXTRACTPSMFEATURE - withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' { + withName: '.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' { publishDir = [ path: { "${params.outdir}/extractpsmfeature" }, mode: params.publish_dir_mode, @@ -79,14 +79,14 @@ if (params.posterior_probabilities == "percolator") { } //PERCOLATOR - withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:PERCOLATOR' { + withName: '.*:ID:PSMRESCORING:PERCOLATOR' { ext.args = "-debug $params.percolator_debug" } } } else { process { // IDPEP - withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:IDPEP' { + withName: '.*:ID:PSMRESCORING:IDPEP' { ext.args = "-debug $params.idpep_debug" } } @@ -96,7 +96,7 @@ if (params.search_engines.split(",").size() == 1) { process { // IDSCORESWITCHER - withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMFDRCONTROL:IDSCORESWITCHER' { + withName: '.*:ID:PSMFDRCONTROL:IDSCORESWITCHER' { ext.args = [ "-new_score_orientation lower_better", "-old_score \"Posterior Error Probability\"", @@ -116,7 +116,7 @@ if (params.enable_mod_localization) { process { // IDSCORESWITCHERFORLUCIPHOR - withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PHOSPHOSCORING:IDSCORESWITCHERFORLUCIPHOR' { + withName: '.*:ID:PHOSPHOSCORING:IDSCORESWITCHERFORLUCIPHOR' { ext.args = [ "-new_score_orientation lower_better", "-old_score \"q-value\"", @@ -131,7 +131,7 @@ if (params.enable_mod_localization) { } // LuciphorAdapter - withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PHOSPHOSCORING:LUCIPHORADAPTER' { + withName: '.*:ID:PHOSPHOSCORING:LUCIPHORADAPTER' { ext.args = "-debug $params.luciphor_debug" } } @@ -139,7 +139,7 @@ if (params.enable_mod_localization) { process { // ISOBARICANALYZER - withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:ISOBARICANALYZER' { + withName: '.*:TMT:.*:ISOBARICANALYZER' { ext.args = "-debug $params.iso_debug" publishDir = [ path: { "${params.outdir}/isobaricanalyzer" }, @@ -150,7 +150,7 @@ process { } // IDMAPPER - withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:IDMAPPER' { + withName: '.*:TMT:.*:IDMAPPER' { ext.args = "-debug $params.idmapper_debug" publishDir = [ path: { "${params.outdir}/idmapper" }, @@ -161,7 +161,7 @@ process { } // IDFILTER on PROTEIN LEVEL - withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEININFERENCE:IDFILTER' { + withName: '.*:TMT:PROTEININFERENCE:IDFILTER' { ext.args = [ "-score:prot \"$params.protein_level_fdr_cutoff\"", "-delete_unreferenced_peptide_hits", @@ -176,20 +176,20 @@ process { } // PROTEINQUANTIFIER - withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:PROTEINQUANTIFIER' { + withName: '.*:TMT:PROTEINQUANT:PROTEINQUANTIFIER' { ext.args = "-debug 0" } // MSSTATSCONVERTER - withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:MSSTATSCONVERTER' { + withName: '.*:TMT:PROTEINQUANT:MSSTATSCONVERTER' { ext.args = "-debug 0" } } -if (params.protein_inference_method == "bayesian") { +if (params.protein_inference_method.equals("bayesian")) { process { // EPIFANY - withName: 'NFCORE_QUANTMS:QUANTMS:.*:EPIFANY' { + withName: '.*:EPIFANY' { ext.args = "-keep_best_psm_only false -debug $params.protein_inference_debug" publishDir = [ path: { "${params.outdir}/epifany" }, @@ -202,7 +202,7 @@ if (params.protein_inference_method == "bayesian") { } else { process { // PROTEININFERENCER - withName: 'NFCORE_QUANTMS:QUANTMS:.*:PROTEININFERENCER' { + withName: '.*:PROTEININFERENCER' { ext.args = "-debug $params.protein_inference_debug" publishDir = [ path: { "${params.outdir}/proteininferencer" }, @@ -217,7 +217,7 @@ if (params.protein_inference_method == "bayesian") { process { // IDFILTER - withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMFDRCONTROL:IDFILTER' { + withName: '.*:ID:PSMFDRCONTROL:IDFILTER' { ext.args = "-score:pep \"$params.psm_pep_fdr_cutoff\"" ext.suffix = '.idXML' publishDir = [ @@ -228,12 +228,12 @@ process { } // PROTEOMICSLFQ - withName: 'NFCORE_QUANTMS:QUANTMS:LFQ:PROTEOMICSLFQ' { + withName: '.*:LFQ:PROTEOMICSLFQ' { ext.args = "-debug $params.plfq_debug" } // DIA-NN - withName: 'NFCORE_QUANTMS:QUANTMS:DIA:.*' { + withName: '.*:DIA:.*' { ext.when = { !params.enable_conda } } diff --git a/conf/test_localize.config b/conf/test_localize.config index 7a64e2d2..4c92403a 100644 --- a/conf/test_localize.config +++ b/conf/test_localize.config @@ -22,9 +22,10 @@ params { outdir = "./results_localize" // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/phospho/test_phospho.sdrf' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/test_phospho.sdrf' database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/pools_crap_targetdecoy.fasta' enable_mod_localization = true search_engines = 'comet,msgf' enable_qc = true + skip_post_msstats = true } diff --git a/docs/output.md b/docs/output.md index 1c667f94..84e10da4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -19,9 +19,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d 7. If multiple search engines were chosen, a combined FDR is calculated 8. Single run PSM/Peptide-level FDR filtering 9. If localization of modifications was requested, Luciphor2 is applied via the OpenMS adapter -10. (**DDA-LFQ**) Protein inference and labelfree quantification based on spectral counting or MS1 feature detection, alignment and integration with OpenMS' ProteomicsLFQ. Performs an additional experiment-wide FDR filter on protein (and if requested peptide/PSM-level). +10. (**DDA-LFQ**) Protein inference and label-free quantification based on spectral counting or MS1 feature detection, alignment and integration with OpenMS' ProteomicsLFQ. Performs an additional experiment-wide FDR filter on protein (and if requested peptide/PSM-level). 11. (**DDA-ISO**) Extracts and normalizes isobaric labeling -12. (**DDA-ISO**) Protein inference using the OpenMS ProteinInference tool. In addition, protein FDR filterting is performed in this step for Isobaric datasets (TMT, iTRAQ). +12. (**DDA-ISO**) Protein inference using the OpenMS ProteinInference tool. In addition, protein FDR filtering is performed in this step for Isobaric datasets (TMT, iTRAQ). 13. (**DDA-ISO**) Protein Quantification 14. Generation of QC reports using pMultiQC a library for QC proteomics data analysis. @@ -57,17 +57,17 @@ results - pipeline information: - [pipeline_info/...](#nextflow-pipeline-info) - DDA-LFQ quantification results: - - [proteomicslfq/out.consensusXML](#consenusxml) - - [proteomicslfq/out_msstats.csv](#msstats-ready-quantity-table) + - [proteomicslfq/out.consensusXML](#consensusxml) + - [proteomicslfq/out_msstats.csv](#msstats-ready-quantity-tables) - [proteomicslfq/out_triqler.tsv](#triqler) - [proteomicslfq/out.mzTab](#mztab) - DDA-ISO quantification results: - [proteinquantifier/out.mzTab](#mztab) - [proteinquantifier/peptide_out.csv](#tab-based-openms-formats) - [proteinquantifier/protein_out.csv](#tab-based-openms-formats) - - [msstatsconverter/out_msstats.csv](#msstats-ready-quantity-table) + - [msstatsconverter/out_msstats.csv](#msstats-ready-quantity-tables) - DIA-LFQ quantification results: - - [convert2msstats/out_msstats.csv](#msstats-ready-quantity-table) + - [convert2msstats/out_msstats.csv](#msstats-ready-quantity-tables) - MSstats-processed results - [msstats/out_msstats.mzTab](#msstats-processed-mztab) @@ -86,6 +86,17 @@ results ### File types +#### Spectra + +Quantms main format for spectra is the open [mzML](https://www.psidev.info/mzML) format. However it also supports Thermo raw files through conversion with +ThermoRawFileParser. Mixed inputs should be possible but are untested. Conversion results can be cached if run locally or outputted to results. +Mismatches between file extensions in the design and on disk can be corrected through parameters. + +#### Protein database + +The input protein database needs to be in standard fasta format. We recommend removing stop codons `*` in a way that is suitable to your analysis to avoid +different handling between peptide search engines. + #### Identifications Intermediate output for the PSM/peptide-level filtered identifications per raw/mzML file happens in OpenMS' @@ -93,7 +104,7 @@ internal [idXML](https://github.com/OpenMS/OpenMS/blob/develop/share/OpenMS/SCHE #### Quantities -Depending on the mode quantms reports its outputs in different folders, see [Output structure](#output-structure). +Depending on the mode, quantms reports its outputs for quantities in different folders and formats, see [Output structure](#output-structure). ##### ConsensusXML @@ -113,6 +124,11 @@ MSstats output is generated for all three pipelines DDA-LFQ, DDA-ISO and DIA-LFQ OpenMStoMSstats function of the MSstats R package. It should hold the same quantities as the consensusXML but rearranged in a "long" table format with additional information about the experimental design used by MSstats. +##### Triqler + +Output to be used as input in Triqler has similar information in a tsv format as the output for MSstats. Additionally, it contains quantities for +decoy identifications and search engine scores. + #### mzTab The mzTab is exported for all three workflows DDA-LFQ, DDA-ISO and DIA-LFQ. It is a complete [mzTab](https://github.com/HUPO-PSI/mzTab) file diff --git a/docs/usage.md b/docs/usage.md index d8dc65a2..95b7774e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -11,7 +11,7 @@ The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/quantms --input '/url/path/to/your/experimentX_design.tsv' --database '/url/path/to/your/proteindatabase.fasta' -profile docker +nextflow run nf-core/quantms --input '/url/path/to/your/experimentX_design.tsv' --database '/url/path/to/your/proteindatabase.fasta' --outdir './results' -profile docker ``` where the experimental design file has to be one of: diff --git a/nextflow.config b/nextflow.config index b2dea632..dc226019 100644 --- a/nextflow.config +++ b/nextflow.config @@ -203,9 +203,6 @@ params { // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - // Load nf-core custom profiles from different Institutions try { includeConfig "${params.custom_config_base}/nfcore_custom.config" @@ -275,6 +272,10 @@ profiles { test_dia { includeConfig 'conf/test_dia.config' } } +// Load module config after profile, so they can depend on overwritten input parameters specific for each profile. +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. diff --git a/subworkflows/local/create_input_channel.nf b/subworkflows/local/create_input_channel.nf index 7fccb3c4..a179a119 100644 --- a/subworkflows/local/create_input_channel.nf +++ b/subworkflows/local/create_input_channel.nf @@ -92,7 +92,7 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) { // for sdrf read from config file, without it, read from params if (is_sdrf.toString().toLowerCase().contains("false")) { - log.warn " NO SDRF!!!" + log.info "No SDRF given. Using parameters to determine tolerance, enzyme, mod. and labelling settings" meta.labelling_type = params.labelling_type meta.dissociationmethod = params.fragment_method meta.fixedmodifications = params.fixed_mods @@ -131,10 +131,8 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) { exit 1 } } - if (meta.acquisition_method == "dia") { - log.warn "Acquisition Method: '${meta.acquisition_method}'" - } else { - log.warn "Label: '${meta.labelling_type}'" + // Nothing to determing for dia. Only LFQ allowed there. + if (!meta.acquisition_method.equals("dia")) { if (wrapper.labelling_type.equals("")) { if (meta.labelling_type.contains("tmt") || meta.labelling_type.contains("itraq") || meta.labelling_type.contains("label free")) { wrapper.labelling_type = meta.labelling_type @@ -144,7 +142,7 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) { } } else { if (meta.labelling_type != wrapper.labelling_type) { - log.error "Only one label type supported: was '${wrapper.labelling_type}', now is '${meta.labelling_type}'." + log.error "Currently, only one label type per design is supported: was '${wrapper.labelling_type}', now is '${meta.labelling_type}'." exit 1 } }