Skip to content
This repository has been archived by the owner on May 6, 2024. It is now read-only.

Commit

Permalink
Merge pull request #145 from daichengxin/dev
Browse files Browse the repository at this point in the history
Major fix
  • Loading branch information
jpfeuffer committed Mar 27, 2022
2 parents 2935cb5 + b7603b9 commit eb8ee5d
Show file tree
Hide file tree
Showing 59 changed files with 339 additions and 173 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
# Test latest edge release of Nextflow
- NXF_VER: ""
NXF_EDGE: "1"
test_profile: ["test", "test_lfq", "test_dia"]
test_profile: ["test", "test_lfq", "test_dia", "test_localize"]
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
Expand Down
11 changes: 5 additions & 6 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@
import errno
import argparse
from sdrf_pipelines.sdrf.sdrf import SdrfDataFrame
from sdrf_pipelines.sdrf.sdrf_schema import MASS_SPECTROMETRY
from sdrf_pipelines.sdrf.sdrf_schema import MASS_SPECTROMETRY, DEFAULT_TEMPLATE
import pandas as pd

def parse_args(args=None):
Description = "Reformat nf-core/quantms sdrf file and check its contents."
Epilog = "Example usage: python validate_sdrf.py <template> <sdrf> <check_ms>"
Epilog = "Example usage: python validate_sdrf.py <sdrf> <check_ms>"

parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument("TEMPLATE", help="Input sdrf file.")
parser.add_argument("SDRF", help="SDRF/Expdesign file to be validated")
parser.add_argument("ISSDRF", help="SDRF file or Expdesign file")
parser.add_argument("--CHECK_MS", help="check mass spectrometry fields in SDRF.", action="store_true")
Expand All @@ -43,9 +42,9 @@ def print_error(error, context="Line", context_str=""):
print(error_str)
sys.exit(1)

def check_sdrf(template, check_ms, sdrf):
def check_sdrf(check_ms, sdrf):
df = SdrfDataFrame.parse(sdrf)
errors = df.validate(template)
errors = df.validate(DEFAULT_TEMPLATE)
if check_ms:
errors = errors + df.validate(MASS_SPECTROMETRY)
print(errors)
Expand Down Expand Up @@ -107,7 +106,7 @@ def main(args=None):
args = parse_args(args)

if args.ISSDRF == "true" :
check_sdrf(args.TEMPLATE, args.CHECK_MS, args.SDRF)
check_sdrf(args.CHECK_MS, args.SDRF)
else:
check_expdesign(args.SDRF)

Expand Down
59 changes: 50 additions & 9 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,26 @@ if (params.add_decoys) {
if (params.posterior_probabilities == "percolator") {
process {
// EXTRACTPSMFEATURE
withName: 'NFCORE_QUANTMS:QUANTMS:.*:EXTRACTPSMFEATURE' {
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' {
publishDir = [
path: { "${params.outdir}/extractpsmfeature" },
mode: params.publish_dir_mode,
pattern: '*.log',
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

//PERCOLATOR
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:PERCOLATOR' {
ext.args = "-debug $params.percolator_debug"
}
}
} else {
process {
// IDPEP
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:IDPEP' {
ext.args = "-debug $params.idpep_debug"
}
}
}

Expand All @@ -88,7 +100,8 @@ if (params.search_engines.split(",").size() == 1) {
ext.args = [
"-new_score_orientation lower_better",
"-old_score \"Posterior Error Probability\"",
"-new_score_type q-value"
"-new_score_type q-value",
"-debug $params.idscoreswitcher_debug"
].join(' ').trim()
publishDir = [
path: { "${params.outdir}/idscoreswitcher" },
Expand All @@ -107,20 +120,27 @@ if (params.enable_mod_localization) {
ext.args = [
"-new_score_orientation lower_better",
"-old_score \"q-value\"",
"-new_score_type Posterior Error Probability"
"-new_score_type \"Posterior Error Probability\"",
"-debug $params.idscoreswitcher_debug"
].join(' ').trim()
publishDir = [
path: { "${params.outdir}/idscoreswitcherforluciphor" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// LuciphorAdapter
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PHOSPHOSCORING:LUCIPHORADAPTER' {
ext.args = "-debug $params.luciphor_debug"
}
}
}

process {
// ISOBARICANALYZER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:ISOBARICANALYZER' {
ext.args = "-debug $params.iso_debug"
publishDir = [
path: { "${params.outdir}/isobaricanalyzer" },
mode: params.publish_dir_mode,
Expand All @@ -131,6 +151,7 @@ process {

// IDMAPPER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:IDMAPPER' {
ext.args = "-debug $params.idmapper_debug"
publishDir = [
path: { "${params.outdir}/idmapper" },
mode: params.publish_dir_mode,
Expand All @@ -153,12 +174,28 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

//IDCONFLICTRESOLVER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:IDCONFLICTRESOLVER' {
ext.args = "-debug $params.conflict_resolver_debug"
}

//PROTEINQUANTIFIER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:PROTEINQUANTIFIER' {
ext.args = "-debug $params.proteinquant_debug"
}

// MSstatsConverter
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:MSSTATSCONVERTER' {
ext.args = "-debug $params.msstatsconverter_debug"
}
}

if (params.protein_inference_bayesian) {
process {
// EPIFANY
withName: 'NFCORE_QUANTMS:QUANTMS:.*:EPIFANY' {
ext.args = "-debug $params.protein_inference_debug"
publishDir = [
path: { "${params.outdir}/epifany" },
mode: params.publish_dir_mode,
Expand All @@ -171,6 +208,7 @@ if (params.protein_inference_bayesian) {
process {
// PROTEININFERENCER
withName: 'NFCORE_QUANTMS:QUANTMS:.*:PROTEININFERENCER' {
ext.args = "-debug $params.protein_inference_debug"
publishDir = [
path: { "${params.outdir}/proteininferencer" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -204,11 +242,14 @@ process {
]
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: 'copy',
pattern: '*_versions.yml'
]
// PROTEOMICSLFQ
withName: 'NFCORE_QUANTMS:QUANTMS:LFQ:PROTEOMICSLFQ' {
ext.args = "-debug $params.inf_quant_debug"
}

// DIA-NN
withName: 'NFCORE_QUANTMS:QUANTMS:DIA:.*' {
ext.when = { !params.enable_conda }
}

}
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/daichengxin/quantms/dev/assets/PXD000001.sdrf.tsv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv'

database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta'
posterior_probabilities = "percolator"
Expand Down
2 changes: 1 addition & 1 deletion conf/test_dia.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ params {
max_time = 48.h

// Input data
input = 'https://raw.githubusercontent.com/bigbio/quantms/dev/assets/PXD026600.sdrf.tsv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/PXD026600/PXD026600.sdrf.tsv'
database = 'ftp://massive.ucsd.edu/MSV000087597/sequence/REF_EColi_K12_UPS1_combined.fasta'
min_pr_mz = 350
max_pr_mz = 950
Expand Down
2 changes: 1 addition & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ params {
config_profile_description = 'Full test dataset to check pipeline function and sanity of results'

// Input data for full size test
input = 'https://raw.githubusercontent.com/daichengxin/quantms/dev/assets/PXD000001.sdrf.tsv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv'

quant_method = 'ISO'
database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta'
Expand Down
4 changes: 2 additions & 2 deletions conf/test_lfq.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ params {

// Input data
labelling_type = "label free sample"
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/BSA_design_urls.tsv'
database = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/18Protein_SoCe_Tr_detergents_trace_target_decoy.fasta'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/BSA/BSA_design_urls.tsv'
database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/BSA/18Protein_SoCe_Tr_detergents_trace_target_decoy.fasta'
posterior_probabilities = "fit_distributions"
search_engines = "msgf"
decoy_string= "rev"
Expand Down
26 changes: 26 additions & 0 deletions conf/test_localize.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests with
* modification localization
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/quantms -profile test_localize,<docker/singularity/podman>
*/

params {
config_profile_name = 'Test phospho-localization profile'
config_profile_description = 'Minimal test dataset to check pipeline function for phospho-localization, SDRF parsing and ConsensusID.'

// Limit resources so that this can run on Travis
max_cpus = 2
max_memory = 6.GB
max_time = 1.h

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/phospho/test_phospho.sdrf'
database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/pools_crap_targetdecoy.fasta'
enable_mod_localization = true
search_engines = 'comet,msgf'
enable_qc = true
}
4 changes: 3 additions & 1 deletion modules/local/convert2msstats/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ input:
- report:
type: file
description: DIA-NN main report file
pattern: "*.tsv"
- exp_design:
type: file
description: An experimental design file including Sample and replicates column et al.
pattern: "*.tsv"
output:
- out_msstats:
type: file
Expand All @@ -27,4 +29,4 @@ output:
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@Chengxin Dai"
- "@daichengxin"
8 changes: 3 additions & 5 deletions modules/local/diannsearch/main.nf
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
process DIANNSEARCH {
label 'process_high'

if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the DIA-NN tool. Please use docker or singularity containers"
}

//singularity image ?
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.0_cv1/diann_v1.8.0_cv1.img' :
'biocontainers/diann:v1.8.0_cv1' }"
Expand All @@ -23,6 +18,9 @@ process DIANNSEARCH {
path "versions.yml", emit: version
path "*.tsv"

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
mbr = params.targeted_only ? "" : "--reanalyse"
Expand Down
5 changes: 4 additions & 1 deletion modules/local/diannsearch/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@ input:
- lib:
type: file
description: Spectra library file
pattern: "*.tsv"
- spectra:
type: dir
description: The directory for spectra files
- searchdb:
type: file
description: Fasta sequence file
pattern: "*.{fasta,fa}"
- cfg:
type: dir
description: Specifies a file to load options/commands from.
pattern: "*.cfg"
output:
- report:
type: file
Expand All @@ -41,4 +44,4 @@ output:
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@Chengxin Dai"
- "@daichengxin"
2 changes: 1 addition & 1 deletion modules/local/generate_diann_cfg/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ output:
description: log file
pattern: "*.log"
authors:
- "@Chengxin Dai"
- "@daichengxin"
8 changes: 3 additions & 5 deletions modules/local/librarygeneration/main.nf
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
process LIBRARYGENERATION {
label 'process_high'

if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the DIA-NN tool. Please use docker or singularity containers"
}

//singularity image ?
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.0_cv1/diann_v1.8.0_cv1.img' :
'biocontainers/diann:v1.8.0_cv1' }"
Expand All @@ -21,6 +16,9 @@ process LIBRARYGENERATION {
path "*.tsv.speclib", emit: speclib
path "*.predicted.speclib", emit: predict_speclib

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

Expand Down
3 changes: 3 additions & 0 deletions modules/local/librarygeneration/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@ input:
- spectra:
type: file
description: Spectra file
pattern: "*.mzML"
- fasta:
type: file
description: FASTA sequence databases
pattern: "*.{fasta,fa}"
- cfg:
type: file
description: specifies a configuration file to load options/commands from.
pattern: "*.cfg"
output:
- lib_splib:
type: file
Expand Down
2 changes: 1 addition & 1 deletion modules/local/openms/consensusid/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ process CONSENSUSID {
-algorithm $params.consensusid_algorithm \\
-filter:min_support $params.min_consensus_support \\
-filter:considered_hits $params.consensusid_considered_top_hits \\
-debug 100 \\
-debug $params.consensusid_debug \\
$args \\
> ${meta.id}_consensusID.log
Expand Down
4 changes: 3 additions & 1 deletion modules/local/openms/consensusid/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ input:
- id_file:
type: file
description: |
Identifications from searching a target-decoy database.
Putative peptide-spectrum matches in idXML format (potentially multiple per spectrum). Score needs to be comparable (e.g. probability)
pattern: "*.idXML"
output:
- meta:
type: map
Expand All @@ -26,6 +27,7 @@ output:
type: file
description: |
Identifications with annotated FDR.
pattern: "*.idXML"
- log:
type: file
description: log file
Expand Down
2 changes: 1 addition & 1 deletion modules/local/openms/decoydatabase/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ process DECOYDATABASE {
-method $params.decoy_method \\
-shuffle_max_attempts $params.shuffle_max_attempts \\
-shuffle_sequence_identity_threshold $params.shuffle_sequence_identity_threshold \\
-debug 100 \\
-debug $params.decoydatabase_debug \\
$args \\
> ${db_for_decoy.baseName}_decoy_database.log
Expand Down

0 comments on commit eb8ee5d

Please sign in to comment.