Skip to content

Commit

Permalink
Add: Config for sparcc workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
dileep-kishore committed May 18, 2021
1 parent d8a6b79 commit 546e102
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 21 deletions.
1 change: 1 addition & 0 deletions micone/pipelines/envs/micone-sparcc/env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ channels:
dependencies:
- python=3.8
- fastspar
- parallel
- pip>=20.2.4
- pip:
- pandas>=1.2.1
21 changes: 12 additions & 9 deletions micone/pipelines/modules/network_inference/correlation/sparcc.nf
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
process sparcc {
label 'sparcc'
tag "${id}"
publishDir "${params.output_dir}/${task.process}/${id}", mode: 'copy', overwrite: true
label 'micone'
tag "${meta.id}"
publishDir "${params.output_dir}/${f[0]}/${f[1]}/${f[2]}/${meta.id}",
mode: 'copy',
overwrite: true
input:
// tuple val(id), val(datatuple), val(level), file(otu_file)
tuple val(id), file(otu_file), file(sample_metadata)
tuple val(meta), file(otu_file), file(bootstrap_files), file(obsmeta_file), file(samplemeta_file), file(children_file)
output:
tuple val(id), file('*_corr.tsv')
when:
'sparcc' in params.ni_tools
tuple val(meta), file(otu_file), file('*_corr.tsv'), file('*_corr.boot'), file(obsmeta_file), file(samplemeta_file), file(children_file)
script:
template 'network_inference/correlation/sparcc.sh'
String task_process = "${task.process}"
f = getHierarchy(task_process)
ncpus = params.network_inference.correlation['sparcc']['ncpus']
iterations = params.network_inference.correlation['sparcc']['iterations']
template 'network_inference/correlation/sparcc.py'
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ include { pvalues } from './../bootstrap/pvalue.nf'

workflow sparcc_workflow {
take:
// tuple val(id), file(otu_table)
otu_table_channel
// tuple val(meta), file(otu_file), file(obs_metadata), file(sample_metadata), file(children_map)
input_channel
main:
otu_table_channel | sparcc
// TODO: Maybe include an if statement for the resampling
otu_table_channel | resample
pvalues(sparcc.out, resample.out)
input_channel \
| resample \
| sparcc \
| pvalues
emit:
// has `publishDir` -> ${params.output_dir}/${task.process}/${id}
// sparcc and pvalues have publishDir
// tuple val(meta), file(corr_file), file(pvalue_file) file(obs_metadata), file(sample_metadata), file(children_map)
pvalues.out
}
23 changes: 18 additions & 5 deletions micone/pipelines/templates/network_inference/correlation/sparcc.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
#!/usr/bin/env bash

fastspar --iterations ${params.sparcc.iterations} --yes \
--otu_table $otu_file \
--correlation ${otu_file.baseName.split("_otu")[0]}_corr.tsv \
--covariance ${otu_file.baseName.split("_otu")[0]}_cov.tsv \
--threads ${params.sparcc.ncpus}
fastspar --iterations ${iterations} --yes \
--otu_table ${otu_file} \
--correlation ${meta.id}_corr.tsv \
--covariance ${meta.id}_cov.tsv \
--threads ${ncpus}

function do_fastspar {
args=(\$(echo \$1 | tr "," "\\n"))
number=\$args[0]
otu_table=\$args[0]
fastspar --iterations ${iterations} --yes \
--otu_table \$otu_table \
--correlation ${meta.id}_\${number}_corr.boot \
--covariance ${meta.id}_\${number}_cov.boot \
--threads ${ncpus}
}

find . -name "*_otu.boot" | awk '{print NR "," $0}' | parallel -j ${ncpus} do_fastspar

0 comments on commit 546e102

Please sign in to comment.