Merge pull request #155 from ypriverol/dev

skip msstats tests in PTM tests
nf-core · Apr 21, 2022 · 63dd266 · 63dd266
2 parents 24aa165 + 2757f64
commit 63dd266
Show file tree

Hide file tree

Showing 7 changed files with 55 additions and 38 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -32,6 +32,7 @@ The pipeline is using Nextflow DSL2, each process will be run with its own [Bioc
 | `percolator`     | 3.5        |
 | `pmultiqc`       | 0.0.10     |
 | `luciphor`       | 2020_04_03 |
-| `dia-nn`         | 1.8        |
+| `dia-nn`         | 1.8.1      |
+| `msstats`        | 4.2.0      |
 
 ### `Deprecated`
diff --git a/conf/modules.config b/conf/modules.config
@@ -41,7 +41,7 @@ process {
 // OPENMSPEAKPICKER
 if (params.openms_peakpicking){
     process {
-            withName: 'NFCORE_QUANTMS:QUANTMS:.*:OPENMSPEAKPICKER' {
+            withName: '.*:OPENMSPEAKPICKER' {
             publishDir  = [
                 path: { "${params.outdir}/openmspeakpicker" },
                 mode: params.publish_dir_mode,
@@ -55,7 +55,7 @@ if (params.openms_peakpicking){
 // DECOYDATABASE
 if (params.add_decoys) {
     process {
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:DECOYDATABASE' {
+        withName: '.*:DECOYDATABASE' {
             publishDir  = [
                 path: { "${params.outdir}/decoydatabase" },
                 mode: params.publish_dir_mode,
@@ -69,7 +69,7 @@ if (params.add_decoys) {
 if (params.posterior_probabilities == "percolator") {
     process {
         // EXTRACTPSMFEATURE
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' {
+        withName: '.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' {
             publishDir  = [
                 path: { "${params.outdir}/extractpsmfeature" },
                 mode: params.publish_dir_mode,
@@ -79,14 +79,14 @@ if (params.posterior_probabilities == "percolator") {
         }
 
         //PERCOLATOR
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:PERCOLATOR' {
+        withName: '.*:ID:PSMRESCORING:PERCOLATOR' {
             ext.args    = "-debug $params.percolator_debug"
         }
     }
 } else {
     process {
         // IDPEP
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:IDPEP' {
+        withName: '.*:ID:PSMRESCORING:IDPEP' {
             ext.args    = "-debug $params.idpep_debug"
         }
     }
@@ -96,7 +96,7 @@ if (params.search_engines.split(",").size() == 1) {
     process {
 
         // IDSCORESWITCHER
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMFDRCONTROL:IDSCORESWITCHER' {
+        withName: '.*:ID:PSMFDRCONTROL:IDSCORESWITCHER' {
             ext.args    = [
                 "-new_score_orientation lower_better",
                 "-old_score \"Posterior Error Probability\"",
@@ -116,7 +116,7 @@ if (params.enable_mod_localization) {
     process {
 
         // IDSCORESWITCHERFORLUCIPHOR
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PHOSPHOSCORING:IDSCORESWITCHERFORLUCIPHOR' {
+        withName: '.*:ID:PHOSPHOSCORING:IDSCORESWITCHERFORLUCIPHOR' {
             ext.args    = [
                 "-new_score_orientation lower_better",
                 "-old_score \"q-value\"",
@@ -131,15 +131,15 @@ if (params.enable_mod_localization) {
         }
 
         // LuciphorAdapter
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PHOSPHOSCORING:LUCIPHORADAPTER' {
+        withName: '.*:ID:PHOSPHOSCORING:LUCIPHORADAPTER' {
             ext.args    = "-debug $params.luciphor_debug"
         }
     }
 }
 
 process {
     // ISOBARICANALYZER
-    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:ISOBARICANALYZER' {
+    withName: '.*:TMT:.*:ISOBARICANALYZER' {
         ext.args    = "-debug $params.iso_debug"
         publishDir  = [
             path: { "${params.outdir}/isobaricanalyzer" },
@@ -150,7 +150,7 @@ process {
     }
 
     // IDMAPPER
-    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:IDMAPPER' {
+    withName: '.*:TMT:.*:IDMAPPER' {
         ext.args    = "-debug $params.idmapper_debug"
         publishDir  = [
             path: { "${params.outdir}/idmapper" },
@@ -161,7 +161,7 @@ process {
     }
 
     // IDFILTER on PROTEIN LEVEL
-    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEININFERENCE:IDFILTER' {
+    withName: '.*:TMT:PROTEININFERENCE:IDFILTER' {
         ext.args    = [
             "-score:prot \"$params.protein_level_fdr_cutoff\"",
             "-delete_unreferenced_peptide_hits",
@@ -176,20 +176,20 @@ process {
     }
 
     // PROTEINQUANTIFIER
-    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:PROTEINQUANTIFIER' {
+    withName: '.*:TMT:PROTEINQUANT:PROTEINQUANTIFIER' {
         ext.args    = "-debug 0"
     }
 
     // MSSTATSCONVERTER
-    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:MSSTATSCONVERTER' {
+    withName: '.*:TMT:PROTEINQUANT:MSSTATSCONVERTER' {
         ext.args    = "-debug 0"
     }
 }
 
-if (params.protein_inference_method == "bayesian") {
+if (params.protein_inference_method.equals("bayesian")) {
     process {
         // EPIFANY
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:EPIFANY' {
+        withName: '.*:EPIFANY' {
             ext.args    = "-keep_best_psm_only false -debug $params.protein_inference_debug"
             publishDir  = [
                 path: { "${params.outdir}/epifany" },
@@ -202,7 +202,7 @@ if (params.protein_inference_method == "bayesian") {
 } else {
     process {
         // PROTEININFERENCER
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:PROTEININFERENCER' {
+        withName: '.*:PROTEININFERENCER' {
             ext.args    = "-debug $params.protein_inference_debug"
             publishDir  = [
                 path: { "${params.outdir}/proteininferencer" },
@@ -217,7 +217,7 @@ if (params.protein_inference_method == "bayesian") {
 process {
 
     // IDFILTER
-    withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMFDRCONTROL:IDFILTER' {
+    withName: '.*:ID:PSMFDRCONTROL:IDFILTER' {
         ext.args    = "-score:pep \"$params.psm_pep_fdr_cutoff\""
         ext.suffix  = '.idXML'
         publishDir  = [
@@ -228,12 +228,12 @@ process {
     }
 
     // PROTEOMICSLFQ
-    withName: 'NFCORE_QUANTMS:QUANTMS:LFQ:PROTEOMICSLFQ' {
+    withName: '.*:LFQ:PROTEOMICSLFQ' {
         ext.args    = "-debug $params.plfq_debug"
     }
 
     // DIA-NN
-    withName: 'NFCORE_QUANTMS:QUANTMS:DIA:.*' {
+    withName: '.*:DIA:.*' {
         ext.when = { !params.enable_conda }
     }
 

diff --git a/conf/test_localize.config b/conf/test_localize.config
@@ -22,9 +22,10 @@ params {
     outdir = "./results_localize"
 
     // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/phospho/test_phospho.sdrf'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/test_phospho.sdrf'
     database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/pools_crap_targetdecoy.fasta'
     enable_mod_localization = true
     search_engines = 'comet,msgf'
     enable_qc = true
+    skip_post_msstats = true
 }
diff --git a/docs/output.md b/docs/output.md
@@ -19,9 +19,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 7. If multiple search engines were chosen, a combined FDR is calculated
 8. Single run PSM/Peptide-level FDR filtering
 9. If localization of modifications was requested, Luciphor2 is applied via the OpenMS adapter
-10. (**DDA-LFQ**) Protein inference and labelfree quantification based on spectral counting or MS1 feature detection, alignment and integration with OpenMS' ProteomicsLFQ. Performs an additional experiment-wide FDR filter on protein (and if requested peptide/PSM-level).
+10. (**DDA-LFQ**) Protein inference and label-free quantification based on spectral counting or MS1 feature detection, alignment and integration with OpenMS' ProteomicsLFQ. Performs an additional experiment-wide FDR filter on protein (and if requested peptide/PSM-level).
 11. (**DDA-ISO**) Extracts and normalizes isobaric labeling
-12. (**DDA-ISO**) Protein inference using the OpenMS ProteinInference tool. In addition, protein FDR filterting is performed in this step for Isobaric datasets (TMT, iTRAQ).
+12. (**DDA-ISO**) Protein inference using the OpenMS ProteinInference tool. In addition, protein FDR filtering is performed in this step for Isobaric datasets (TMT, iTRAQ).
 13. (**DDA-ISO**) Protein Quantification
 14. Generation of QC reports using pMultiQC a library for QC proteomics data analysis.
 
@@ -57,17 +57,17 @@ results
 - pipeline information:
   - [pipeline_info/...](#nextflow-pipeline-info)
 - DDA-LFQ quantification results:
-  - [proteomicslfq/out.consensusXML](#consenusxml)
-  - [proteomicslfq/out_msstats.csv](#msstats-ready-quantity-table)
+  - [proteomicslfq/out.consensusXML](#consensusxml)
+  - [proteomicslfq/out_msstats.csv](#msstats-ready-quantity-tables)
   - [proteomicslfq/out_triqler.tsv](#triqler)
   - [proteomicslfq/out.mzTab](#mztab)
 - DDA-ISO quantification results:
   - [proteinquantifier/out.mzTab](#mztab)
   - [proteinquantifier/peptide_out.csv](#tab-based-openms-formats)
   - [proteinquantifier/protein_out.csv](#tab-based-openms-formats)
-  - [msstatsconverter/out_msstats.csv](#msstats-ready-quantity-table)
+  - [msstatsconverter/out_msstats.csv](#msstats-ready-quantity-tables)
 - DIA-LFQ quantification results:
-  - [convert2msstats/out_msstats.csv](#msstats-ready-quantity-table)
+  - [convert2msstats/out_msstats.csv](#msstats-ready-quantity-tables)
 - MSstats-processed results
   - [msstats/out_msstats.mzTab](#msstats-processed-mztab)
 
@@ -86,14 +86,25 @@ results
 
 ### File types
 
+#### Spectra
+
+Quantms main format for spectra is the open [mzML](https://www.psidev.info/mzML) format. However it also supports Thermo raw files through conversion with
+ThermoRawFileParser. Mixed inputs should be possible but are untested. Conversion results can be cached if run locally or outputted to results.
+Mismatches between file extensions in the design and on disk can be corrected through parameters.
+
+#### Protein database
+
+The input protein database needs to be in standard fasta format. We recommend removing stop codons `*` in a way that is suitable to your analysis to avoid
+different handling between peptide search engines.
+
 #### Identifications
 
 Intermediate output for the PSM/peptide-level filtered identifications per raw/mzML file happens in OpenMS'
 internal [idXML](https://github.com/OpenMS/OpenMS/blob/develop/share/OpenMS/SCHEMAS/IdXML_1_5.xsd) format. Only for DDA currently.
 
 #### Quantities
 
-Depending on the mode quantms reports its outputs in different folders, see [Output structure](#output-structure).
+Depending on the mode, quantms reports its outputs for quantities in different folders and formats, see [Output structure](#output-structure).
 
 ##### ConsensusXML
 
@@ -113,6 +124,11 @@ MSstats output is generated for all three pipelines DDA-LFQ, DDA-ISO and DIA-LFQ
 OpenMStoMSstats function of the MSstats R package. It should hold the same quantities as the consensusXML but rearranged in a "long" table format with additional
 information about the experimental design used by MSstats.
 
+##### Triqler
+
+Output to be used as input in Triqler has similar information in a tsv format as the output for MSstats. Additionally, it contains quantities for
+decoy identifications and search engine scores.
+
 #### mzTab
 
 The mzTab is exported for all three workflows DDA-LFQ, DDA-ISO and DIA-LFQ. It is a complete [mzTab](https://github.com/HUPO-PSI/mzTab) file

diff --git a/docs/usage.md b/docs/usage.md
@@ -11,7 +11,7 @@
 The typical command for running the pipeline is as follows:
 
 ```bash
-nextflow run nf-core/quantms --input '/url/path/to/your/experimentX_design.tsv' --database '/url/path/to/your/proteindatabase.fasta' -profile docker
+nextflow run nf-core/quantms --input '/url/path/to/your/experimentX_design.tsv' --database '/url/path/to/your/proteindatabase.fasta' --outdir './results' -profile docker
 ```
 
 where the experimental design file has to be one of:

diff --git a/nextflow.config b/nextflow.config
@@ -203,9 +203,6 @@ params {
 // Load base.config by default for all pipelines
 includeConfig 'conf/base.config'
 
-// Load modules.config for DSL2 module specific options
-includeConfig 'conf/modules.config'
-
 // Load nf-core custom profiles from different Institutions
 try {
     includeConfig "${params.custom_config_base}/nfcore_custom.config"
@@ -275,6 +272,10 @@ profiles {
     test_dia        { includeConfig 'conf/test_dia.config' }
 }
 
+// Load module config after profile, so they can depend on overwritten input parameters specific for each profile.
+// Load modules.config for DSL2 module specific options
+includeConfig 'conf/modules.config'
+
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
 // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.

diff --git a/subworkflows/local/create_input_channel.nf b/subworkflows/local/create_input_channel.nf
@@ -92,7 +92,7 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) {
 
     // for sdrf read from config file, without it, read from params
     if (is_sdrf.toString().toLowerCase().contains("false")) {
-        log.warn " NO SDRF!!!"
+        log.info "No SDRF given. Using parameters to determine tolerance, enzyme, mod. and labelling settings"
         meta.labelling_type             = params.labelling_type
         meta.dissociationmethod         = params.fragment_method
         meta.fixedmodifications         = params.fixed_mods
@@ -131,10 +131,8 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) {
             exit 1
         }
     }
-    if (meta.acquisition_method == "dia") {
-        log.warn "Acquisition Method: '${meta.acquisition_method}'"
-    } else {
-        log.warn "Label: '${meta.labelling_type}'"
+    // Nothing to determing for dia. Only LFQ allowed there.
+    if (!meta.acquisition_method.equals("dia")) { 
         if (wrapper.labelling_type.equals("")) {
             if (meta.labelling_type.contains("tmt") || meta.labelling_type.contains("itraq") || meta.labelling_type.contains("label free")) {
                 wrapper.labelling_type = meta.labelling_type
@@ -144,7 +142,7 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) {
             }
         } else {
             if (meta.labelling_type != wrapper.labelling_type) {
-                log.error "Only one label type supported: was '${wrapper.labelling_type}', now is '${meta.labelling_type}'."
+                log.error "Currently, only one label type per design is supported: was '${wrapper.labelling_type}', now is '${meta.labelling_type}'."
                 exit 1
             }
         }