Skip to content
This repository has been archived by the owner on May 6, 2024. It is now read-only.

update dev from bigbio #12

Merged
merged 16 commits into from
Mar 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
# Test latest edge release of Nextflow
- NXF_VER: ""
NXF_EDGE: "1"
test_profile: ["test", "test_lfq", "test_dia"]
test_profile: ["test", "test_lfq", "test_dia", "test_localize"]
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
Expand Down
11 changes: 5 additions & 6 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@
import errno
import argparse
from sdrf_pipelines.sdrf.sdrf import SdrfDataFrame
from sdrf_pipelines.sdrf.sdrf_schema import MASS_SPECTROMETRY
from sdrf_pipelines.sdrf.sdrf_schema import MASS_SPECTROMETRY, DEFAULT_TEMPLATE
import pandas as pd

def parse_args(args=None):
Description = "Reformat nf-core/quantms sdrf file and check its contents."
Epilog = "Example usage: python validate_sdrf.py <template> <sdrf> <check_ms>"
Epilog = "Example usage: python validate_sdrf.py <sdrf> <check_ms>"

parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument("TEMPLATE", help="Input sdrf file.")
parser.add_argument("SDRF", help="SDRF/Expdesign file to be validated")
parser.add_argument("ISSDRF", help="SDRF file or Expdesign file")
parser.add_argument("--CHECK_MS", help="check mass spectrometry fields in SDRF.", action="store_true")
Expand All @@ -43,9 +42,9 @@ def print_error(error, context="Line", context_str=""):
print(error_str)
sys.exit(1)

def check_sdrf(template, check_ms, sdrf):
def check_sdrf(check_ms, sdrf):
df = SdrfDataFrame.parse(sdrf)
errors = df.validate(template)
errors = df.validate(DEFAULT_TEMPLATE)
if check_ms:
errors = errors + df.validate(MASS_SPECTROMETRY)
print(errors)
Expand Down Expand Up @@ -107,7 +106,7 @@ def main(args=None):
args = parse_args(args)

if args.ISSDRF == "true" :
check_sdrf(args.TEMPLATE, args.CHECK_MS, args.SDRF)
check_sdrf(args.CHECK_MS, args.SDRF)
else:
check_expdesign(args.SDRF)

Expand Down
59 changes: 50 additions & 9 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,26 @@ if (params.add_decoys) {
if (params.posterior_probabilities == "percolator") {
process {
// EXTRACTPSMFEATURE
withName: 'NFCORE_QUANTMS:QUANTMS:.*:EXTRACTPSMFEATURE' {
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' {
publishDir = [
path: { "${params.outdir}/extractpsmfeature" },
mode: params.publish_dir_mode,
pattern: '*.log',
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

//PERCOLATOR
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:PERCOLATOR' {
ext.args = "-debug $params.percolator_debug"
}
}
} else {
process {
// IDPEP
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMRESCORING:IDPEP' {
ext.args = "-debug $params.idpep_debug"
}
}
}

Expand All @@ -88,7 +100,8 @@ if (params.search_engines.split(",").size() == 1) {
ext.args = [
"-new_score_orientation lower_better",
"-old_score \"Posterior Error Probability\"",
"-new_score_type q-value"
"-new_score_type q-value",
"-debug $params.idscoreswitcher_debug"
].join(' ').trim()
publishDir = [
path: { "${params.outdir}/idscoreswitcher" },
Expand All @@ -107,20 +120,27 @@ if (params.enable_mod_localization) {
ext.args = [
"-new_score_orientation lower_better",
"-old_score \"q-value\"",
"-new_score_type Posterior Error Probability"
"-new_score_type \"Posterior Error Probability\"",
"-debug $params.idscoreswitcher_debug"
].join(' ').trim()
publishDir = [
path: { "${params.outdir}/idscoreswitcherforluciphor" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// LuciphorAdapter
withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PHOSPHOSCORING:LUCIPHORADAPTER' {
ext.args = "-debug $params.luciphor_debug"
}
}
}

process {
// ISOBARICANALYZER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:ISOBARICANALYZER' {
ext.args = "-debug $params.iso_debug"
publishDir = [
path: { "${params.outdir}/isobaricanalyzer" },
mode: params.publish_dir_mode,
Expand All @@ -131,6 +151,7 @@ process {

// IDMAPPER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:IDMAPPER' {
ext.args = "-debug $params.idmapper_debug"
publishDir = [
path: { "${params.outdir}/idmapper" },
mode: params.publish_dir_mode,
Expand All @@ -153,12 +174,28 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

//IDCONFLICTRESOLVER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:IDCONFLICTRESOLVER' {
ext.args = "-debug $params.conflict_resolver_debug"
}

//PROTEINQUANTIFIER
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:PROTEINQUANTIFIER' {
ext.args = "-debug $params.proteinquant_debug"
}

// MSstatsConverter
withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:MSSTATSCONVERTER' {
ext.args = "-debug $params.msstatsconverter_debug"
}
}

if (params.protein_inference_bayesian) {
process {
// EPIFANY
withName: 'NFCORE_QUANTMS:QUANTMS:.*:EPIFANY' {
ext.args = "-debug $params.protein_inference_debug"
publishDir = [
path: { "${params.outdir}/epifany" },
mode: params.publish_dir_mode,
Expand All @@ -171,6 +208,7 @@ if (params.protein_inference_bayesian) {
process {
// PROTEININFERENCER
withName: 'NFCORE_QUANTMS:QUANTMS:.*:PROTEININFERENCER' {
ext.args = "-debug $params.protein_inference_debug"
publishDir = [
path: { "${params.outdir}/proteininferencer" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -204,11 +242,14 @@ process {
]
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: 'copy',
pattern: '*_versions.yml'
]
// PROTEOMICSLFQ
withName: 'NFCORE_QUANTMS:QUANTMS:LFQ:PROTEOMICSLFQ' {
ext.args = "-debug $params.inf_quant_debug"
}

// DIA-NN
withName: 'NFCORE_QUANTMS:QUANTMS:DIA:.*' {
ext.when = { !params.enable_conda }
}

}
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/daichengxin/quantms/dev/assets/PXD000001.sdrf.tsv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv'

database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta'
posterior_probabilities = "percolator"
Expand Down
2 changes: 1 addition & 1 deletion conf/test_dia.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ params {
max_time = 48.h

// Input data
input = 'https://raw.githubusercontent.com/bigbio/quantms/dev/assets/PXD026600.sdrf.tsv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/PXD026600/PXD026600.sdrf.tsv'
database = 'ftp://massive.ucsd.edu/MSV000087597/sequence/REF_EColi_K12_UPS1_combined.fasta'
min_pr_mz = 350
max_pr_mz = 950
Expand Down
2 changes: 1 addition & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ params {
config_profile_description = 'Full test dataset to check pipeline function and sanity of results'

// Input data for full size test
input = 'https://raw.githubusercontent.com/daichengxin/quantms/dev/assets/PXD000001.sdrf.tsv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv'

quant_method = 'ISO'
database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta'
Expand Down
4 changes: 2 additions & 2 deletions conf/test_lfq.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ params {

// Input data
labelling_type = "label free sample"
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/BSA_design_urls.tsv'
database = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/18Protein_SoCe_Tr_detergents_trace_target_decoy.fasta'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/BSA/BSA_design_urls.tsv'
database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/BSA/18Protein_SoCe_Tr_detergents_trace_target_decoy.fasta'
posterior_probabilities = "fit_distributions"
search_engines = "msgf"
decoy_string= "rev"
Expand Down
26 changes: 26 additions & 0 deletions conf/test_localize.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests with
* modification localization
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/quantms -profile test_localize,<docker/singularity/podman>
*/

params {
config_profile_name = 'Test phospho-localization profile'
config_profile_description = 'Minimal test dataset to check pipeline function for phospho-localization, SDRF parsing and ConsensusID.'

// Limit resources so that this can run on Travis
max_cpus = 2
max_memory = 6.GB
max_time = 1.h

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/phospho/test_phospho.sdrf'
database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/pools_crap_targetdecoy.fasta'
enable_mod_localization = true
search_engines = 'comet,msgf'
enable_qc = true
}
4 changes: 3 additions & 1 deletion modules/local/convert2msstats/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ input:
- report:
type: file
description: DIA-NN main report file
pattern: "*.tsv"
- exp_design:
type: file
description: An experimental design file including Sample and replicates column et al.
pattern: "*.tsv"
output:
- out_msstats:
type: file
Expand All @@ -27,4 +29,4 @@ output:
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@Chengxin Dai"
- "@daichengxin"
8 changes: 3 additions & 5 deletions modules/local/diannsearch/main.nf
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
process DIANNSEARCH {
label 'process_high'

if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the DIA-NN tool. Please use docker or singularity containers"
}

//singularity image ?
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.0_cv1/diann_v1.8.0_cv1.img' :
'biocontainers/diann:v1.8.0_cv1' }"
Expand All @@ -23,6 +18,9 @@ process DIANNSEARCH {
path "versions.yml", emit: version
path "*.tsv"

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
mbr = params.targeted_only ? "" : "--reanalyse"
Expand Down
5 changes: 4 additions & 1 deletion modules/local/diannsearch/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@ input:
- lib:
type: file
description: Spectra library file
pattern: "*.tsv"
- spectra:
type: dir
description: The directory for spectra files
- searchdb:
type: file
description: Fasta sequence file
pattern: "*.{fasta,fa}"
- cfg:
type: dir
description: Specifies a file to load options/commands from.
pattern: "*.cfg"
output:
- report:
type: file
Expand All @@ -41,4 +44,4 @@ output:
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@Chengxin Dai"
- "@daichengxin"
2 changes: 1 addition & 1 deletion modules/local/generate_diann_cfg/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ output:
description: log file
pattern: "*.log"
authors:
- "@Chengxin Dai"
- "@daichengxin"
8 changes: 3 additions & 5 deletions modules/local/librarygeneration/main.nf
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
process LIBRARYGENERATION {
label 'process_high'

if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the DIA-NN tool. Please use docker or singularity containers"
}

//singularity image ?
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.0_cv1/diann_v1.8.0_cv1.img' :
'biocontainers/diann:v1.8.0_cv1' }"
Expand All @@ -21,6 +16,9 @@ process LIBRARYGENERATION {
path "*.tsv.speclib", emit: speclib
path "*.predicted.speclib", emit: predict_speclib

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

Expand Down
3 changes: 3 additions & 0 deletions modules/local/librarygeneration/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@ input:
- spectra:
type: file
description: Spectra file
pattern: "*.mzML"
- fasta:
type: file
description: FASTA sequence databases
pattern: "*.{fasta,fa}"
- cfg:
type: file
description: specifies a configuration file to load options/commands from.
pattern: "*.cfg"
output:
- lib_splib:
type: file
Expand Down
2 changes: 1 addition & 1 deletion modules/local/openms/consensusid/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ process CONSENSUSID {
-algorithm $params.consensusid_algorithm \\
-filter:min_support $params.min_consensus_support \\
-filter:considered_hits $params.consensusid_considered_top_hits \\
-debug 100 \\
-debug $params.consensusid_debug \\
$args \\
> ${meta.id}_consensusID.log

Expand Down
4 changes: 3 additions & 1 deletion modules/local/openms/consensusid/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ input:
- id_file:
type: file
description: |
Identifications from searching a target-decoy database.
Putative peptide-spectrum matches in idXML format (potentially multiple per spectrum). Score needs to be comparable (e.g. probability)
pattern: "*.idXML"
output:
- meta:
type: map
Expand All @@ -26,6 +27,7 @@ output:
type: file
description: |
Identifications with annotated FDR.
pattern: "*.idXML"
- log:
type: file
description: log file
Expand Down
2 changes: 1 addition & 1 deletion modules/local/openms/decoydatabase/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ process DECOYDATABASE {
-method $params.decoy_method \\
-shuffle_max_attempts $params.shuffle_max_attempts \\
-shuffle_sequence_identity_threshold $params.shuffle_sequence_identity_threshold \\
-debug 100 \\
-debug $params.decoydatabase_debug \\
$args \\
> ${db_for_decoy.baseName}_decoy_database.log

Expand Down