Merge pull request #145 from daichengxin/dev

Major fix
nf-core · Mar 27, 2022 · eb8ee5d · eb8ee5d
2 parents 2935cb5 + b7603b9
commit eb8ee5d
Show file tree

Hide file tree

Showing 59 changed files with 339 additions and 173 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
           # Test latest edge release of Nextflow
           - NXF_VER: ""
             NXF_EDGE: "1"
-        test_profile: ["test", "test_lfq", "test_dia"]
+        test_profile: ["test", "test_lfq", "test_dia", "test_localize"]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
@@ -8,15 +8,14 @@
 import errno
 import argparse
 from sdrf_pipelines.sdrf.sdrf import SdrfDataFrame
-from sdrf_pipelines.sdrf.sdrf_schema import MASS_SPECTROMETRY
+from sdrf_pipelines.sdrf.sdrf_schema import MASS_SPECTROMETRY, DEFAULT_TEMPLATE
 import pandas as pd
 
 def parse_args(args=None):
     Description = "Reformat nf-core/quantms sdrf file and check its contents."
-    Epilog = "Example usage: python validate_sdrf.py <template> <sdrf> <check_ms>"
+    Epilog = "Example usage: python validate_sdrf.py <sdrf> <check_ms>"
 
     parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
-    parser.add_argument("TEMPLATE", help="Input sdrf file.")
     parser.add_argument("SDRF", help="SDRF/Expdesign file to be validated")
     parser.add_argument("ISSDRF", help="SDRF file or Expdesign file")
     parser.add_argument("--CHECK_MS", help="check mass spectrometry fields in SDRF.", action="store_true")
@@ -43,9 +42,9 @@ def print_error(error, context="Line", context_str=""):
     print(error_str)
     sys.exit(1)
 
-def check_sdrf(template, check_ms, sdrf):
+def check_sdrf(check_ms, sdrf):
     df = SdrfDataFrame.parse(sdrf)
-    errors = df.validate(template)
+    errors = df.validate(DEFAULT_TEMPLATE)
     if check_ms:
         errors = errors + df.validate(MASS_SPECTROMETRY)
     print(errors)
@@ -107,7 +106,7 @@ def main(args=None):
     args = parse_args(args)
 
     if args.ISSDRF == "true" :
-        check_sdrf(args.TEMPLATE, args.CHECK_MS, args.SDRF)
+        check_sdrf(args.CHECK_MS, args.SDRF)
     else:
         check_expdesign(args.SDRF)
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -69,14 +69,26 @@ if (params.add_decoys) {
 if (params.posterior_probabilities == "percolator") {
     process {
         // EXTRACTPSMFEATURE
-        withName: 'NFCORE_QUANTMS:QUANTMS:.*:EXTRACTPSMFEATURE' {
+        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' {
             publishDir  = [
                 path: { "${params.outdir}/extractpsmfeature" },
                 mode: params.publish_dir_mode,
                 pattern: '*.log',
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
+
+        //PERCOLATOR
+        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:PERCOLATOR' {
+            ext.args    = "-debug $params.percolator_debug"
+        }
+    }
+} else {
+    process {
+        // IDPEP
+        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:IDPEP' {
+            ext.args    = "-debug $params.idpep_debug"
+        }
     }
 }
 
@@ -88,7 +100,8 @@ if (params.search_engines.split(",").size() == 1) {
             ext.args    = [
                 "-new_score_orientation lower_better",
                 "-old_score \"Posterior Error Probability\"",
-                "-new_score_type q-value"
+                "-new_score_type q-value",
+                "-debug $params.idscoreswitcher_debug"
             ].join(' ').trim()
             publishDir  = [
                 path: { "${params.outdir}/idscoreswitcher" },
@@ -107,20 +120,27 @@ if (params.enable_mod_localization) {
             ext.args    = [
                 "-new_score_orientation lower_better",
                 "-old_score \"q-value\"",
-                "-new_score_type Posterior Error Probability"
+                "-new_score_type \"Posterior Error Probability\"",
+                "-debug $params.idscoreswitcher_debug"
             ].join(' ').trim()
             publishDir  = [
                 path: { "${params.outdir}/idscoreswitcherforluciphor" },
                 mode: params.publish_dir_mode,
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
+
+        // LuciphorAdapter
+        withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PHOSPHOSCORING:LUCIPHORADAPTER' {
+            ext.args    = "-debug $params.luciphor_debug"
+        }
     }
 }
 
 process {
     // ISOBARICANALYZER
     withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:ISOBARICANALYZER' {
+        ext.args    = "-debug $params.iso_debug"
         publishDir  = [
             path: { "${params.outdir}/isobaricanalyzer" },
             mode: params.publish_dir_mode,
@@ -131,6 +151,7 @@ process {
 
     // IDMAPPER
     withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:IDMAPPER' {
+        ext.args    = "-debug $params.idmapper_debug"
         publishDir  = [
             path: { "${params.outdir}/idmapper" },
             mode: params.publish_dir_mode,
@@ -153,12 +174,28 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
+
+    //IDCONFLICTRESOLVER
+    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:IDCONFLICTRESOLVER' {
+        ext.args    = "-debug $params.conflict_resolver_debug"
+    }
+
+    //PROTEINQUANTIFIER
+    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:PROTEINQUANTIFIER' {
+        ext.args    = "-debug $params.proteinquant_debug"
+    }
+
+    // MSstatsConverter
+    withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:MSSTATSCONVERTER' {
+        ext.args    = "-debug $params.msstatsconverter_debug"
+    }
 }
 
 if (params.protein_inference_bayesian) {
     process {
         // EPIFANY
         withName: 'NFCORE_QUANTMS:QUANTMS:.*:EPIFANY' {
+            ext.args    = "-debug $params.protein_inference_debug"
             publishDir  = [
                 path: { "${params.outdir}/epifany" },
                 mode: params.publish_dir_mode,
@@ -171,6 +208,7 @@ if (params.protein_inference_bayesian) {
     process {
         // PROTEININFERENCER
         withName: 'NFCORE_QUANTMS:QUANTMS:.*:PROTEININFERENCER' {
+            ext.args    = "-debug $params.protein_inference_debug"
             publishDir  = [
                 path: { "${params.outdir}/proteininferencer" },
                 mode: params.publish_dir_mode,
@@ -204,11 +242,14 @@ process {
         ]
     }
 
-    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
-        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
-            mode: 'copy',
-            pattern: '*_versions.yml'
-        ]
+    // PROTEOMICSLFQ
+    withName: 'NFCORE_QUANTMS:QUANTMS:LFQ:PROTEOMICSLFQ' {
+        ext.args    = "-debug $params.inf_quant_debug"
     }
+
+    // DIA-NN
+    withName: 'NFCORE_QUANTMS:QUANTMS:DIA:.*' {
+        ext.when = { !params.enable_conda }
+    }
+
 }
diff --git a/conf/test.config b/conf/test.config
@@ -20,7 +20,7 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input = 'https://raw.githubusercontent.com/daichengxin/quantms/dev/assets/PXD000001.sdrf.tsv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv'
 
     database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta'
     posterior_probabilities = "percolator"

diff --git a/conf/test_dia.config b/conf/test_dia.config
@@ -16,7 +16,7 @@ params {
     max_time = 48.h
 
     // Input data
-    input = 'https://raw.githubusercontent.com/bigbio/quantms/dev/assets/PXD026600.sdrf.tsv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/PXD026600/PXD026600.sdrf.tsv'
     database = 'ftp://massive.ucsd.edu/MSV000087597/sequence/REF_EColi_K12_UPS1_combined.fasta'
     min_pr_mz = 350
     max_pr_mz = 950

diff --git a/conf/test_full.config b/conf/test_full.config
@@ -15,7 +15,7 @@ params {
     config_profile_description = 'Full test dataset to check pipeline function and sanity of results'
 
     // Input data for full size test
-    input = 'https://raw.githubusercontent.com/daichengxin/quantms/dev/assets/PXD000001.sdrf.tsv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv'
 
     quant_method = 'ISO'
     database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta'

diff --git a/conf/test_lfq.config b/conf/test_lfq.config
@@ -17,8 +17,8 @@ params {
 
     // Input data
     labelling_type = "label free sample"
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/BSA_design_urls.tsv'
-    database = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/18Protein_SoCe_Tr_detergents_trace_target_decoy.fasta'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/BSA/BSA_design_urls.tsv'
+    database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/BSA/18Protein_SoCe_Tr_detergents_trace_target_decoy.fasta'
     posterior_probabilities = "fit_distributions"
     search_engines = "msgf"
     decoy_string= "rev"

diff --git a/conf/test_localize.config b/conf/test_localize.config
@@ -0,0 +1,26 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for running tests with
+ *  modification localization
+ * -------------------------------------------------
+ * Defines bundled input files and everything required
+ * to run a fast and simple test. Use as follows:
+ *   nextflow run nf-core/quantms -profile test_localize,<docker/singularity/podman>
+ */
+
+params {
+    config_profile_name = 'Test phospho-localization profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function for phospho-localization, SDRF parsing and ConsensusID.'
+
+    // Limit resources so that this can run on Travis
+    max_cpus = 2
+    max_memory = 6.GB
+    max_time = 1.h
+
+    // Input data
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/phospho/test_phospho.sdrf'
+    database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/pools_crap_targetdecoy.fasta'
+    enable_mod_localization = true
+    search_engines = 'comet,msgf'
+    enable_qc = true
+}
diff --git a/modules/local/convert2msstats/meta.yml b/modules/local/convert2msstats/meta.yml
@@ -14,9 +14,11 @@ input:
   - report:
       type: file
       description: DIA-NN main report file
+      pattern: "*.tsv"
   - exp_design:
       type: file
       description: An experimental design file including Sample and replicates column et al.
+      pattern: "*.tsv"
 output:
   - out_msstats:
       type: file
@@ -27,4 +29,4 @@ output:
       description: File containing software version
       pattern: "*.{version.txt}"
 authors:
-  - "@Chengxin Dai"
+  - "@daichengxin"
diff --git a/modules/local/diannsearch/main.nf b/modules/local/diannsearch/main.nf
@@ -1,11 +1,6 @@
 process DIANNSEARCH {
     label 'process_high'
 
-    if (params.enable_conda) {
-        exit 1, "Conda environments cannot be used when using the DIA-NN tool. Please use docker or singularity containers"
-    }
-
-    //singularity image ?
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.0_cv1/diann_v1.8.0_cv1.img' :
         'biocontainers/diann:v1.8.0_cv1' }"
@@ -23,6 +18,9 @@ process DIANNSEARCH {
     path "versions.yml", emit: version
     path "*.tsv"
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     def args = task.ext.args ?: ''
     mbr = params.targeted_only ? "" : "--reanalyse"

diff --git a/modules/local/diannsearch/meta.yml b/modules/local/diannsearch/meta.yml
@@ -14,15 +14,18 @@ input:
   - lib:
       type: file
       description: Spectra library file
+      pattern: "*.tsv"
   - spectra:
       type: dir
       description: The directory for spectra files
   - searchdb:
       type: file
       description: Fasta sequence file
+      pattern: "*.{fasta,fa}"
   - cfg:
       type: dir
       description: Specifies a file to load options/commands from.
+      pattern: "*.cfg"
 output:
   - report:
       type: file
@@ -41,4 +44,4 @@ output:
       description: File containing software version
       pattern: "*.{version.txt}"
 authors:
-  - "@Chengxin Dai"
+  - "@daichengxin"
diff --git a/modules/local/generate_diann_cfg/meta.yml b/modules/local/generate_diann_cfg/meta.yml
@@ -31,4 +31,4 @@ output:
       description: log file
       pattern: "*.log"
 authors:
-  - "@Chengxin Dai"
+  - "@daichengxin"
diff --git a/modules/local/librarygeneration/main.nf b/modules/local/librarygeneration/main.nf
@@ -1,11 +1,6 @@
 process LIBRARYGENERATION {
     label 'process_high'
 
-    if (params.enable_conda) {
-        exit 1, "Conda environments cannot be used when using the DIA-NN tool. Please use docker or singularity containers"
-    }
-
-    //singularity image ?
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.0_cv1/diann_v1.8.0_cv1.img' :
         'biocontainers/diann:v1.8.0_cv1' }"
@@ -21,6 +16,9 @@ process LIBRARYGENERATION {
     path "*.tsv.speclib", emit: speclib
     path "*.predicted.speclib", emit: predict_speclib
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     def args = task.ext.args ?: ''
 

diff --git a/modules/local/librarygeneration/meta.yml b/modules/local/librarygeneration/meta.yml
@@ -14,12 +14,15 @@ input:
   - spectra:
       type: file
       description: Spectra file
+      pattern: "*.mzML"
   - fasta:
       type: file
       description: FASTA sequence databases
+      pattern: "*.{fasta,fa}"
   - cfg:
       type: file
       description: specifies a configuration file to load options/commands from.
+      pattern: "*.cfg"
 output:
   - lib_splib:
       type: file

diff --git a/modules/local/openms/consensusid/main.nf b/modules/local/openms/consensusid/main.nf
@@ -29,7 +29,7 @@ process CONSENSUSID {
         -algorithm $params.consensusid_algorithm \\
         -filter:min_support $params.min_consensus_support \\
         -filter:considered_hits $params.consensusid_considered_top_hits \\
-        -debug 100 \\
+        -debug $params.consensusid_debug \\
         $args \\
         > ${meta.id}_consensusID.log
 

diff --git a/modules/local/openms/consensusid/meta.yml b/modules/local/openms/consensusid/meta.yml
@@ -17,7 +17,8 @@ input:
   - id_file:
       type: file
       description: |
-        Identifications from searching a target-decoy database.
+        Putative peptide-spectrum matches in idXML format (potentially multiple per spectrum). Score needs to be comparable (e.g. probability)
+      pattern: "*.idXML"
 output:
   - meta:
       type: map
@@ -26,6 +27,7 @@ output:
       type: file
       description: |
         Identifications with annotated FDR.
+      pattern: "*.idXML"
   - log:
       type: file
       description: log file

diff --git a/modules/local/openms/decoydatabase/main.nf b/modules/local/openms/decoydatabase/main.nf
@@ -25,7 +25,7 @@ process DECOYDATABASE {
         -method $params.decoy_method \\
         -shuffle_max_attempts $params.shuffle_max_attempts \\
         -shuffle_sequence_identity_threshold $params.shuffle_sequence_identity_threshold \\
-        -debug 100 \\
+        -debug $params.decoydatabase_debug \\
         $args \\
         > ${db_for_decoy.baseName}_decoy_database.log