Skip to content

Commit

Permalink
Add: Config for otu_processing workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
dileep-kishore committed May 16, 2021
1 parent 8f58904 commit 6901f16
Show file tree
Hide file tree
Showing 14 changed files with 155 additions and 114 deletions.
35 changes: 35 additions & 0 deletions micone/pipelines/configs/otu_processing.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// -*- mode: groovy -*-

/*
* Reference for useful snippets:
publishDir "${params.output_dir}/${f[0]}/${f[1]}/${f[2]}/${meta.id}",
saveAs: { filename -> filename.split("/")[1] },
mode: 'copy',
overwrite: true
String task_process = "${task.process}"
f = getHierarchy(task_process)
*/

params {
otu_processing {
export {
'biom2tsv' {}
}
transform {
'group' {
tax_levels = ['Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
}
// NOTE: Set to "False" to disable filtering
// NOTE: Set axis to "None" to disable normalization
'normalize' {
axis = "sample"
count_thres = 500
prevalence_thres = 0.05
abundance_thres = 0.01
rm_sparse_obs = "True"
rm_sparse_samples = "True"
}
}
}
}
13 changes: 0 additions & 13 deletions micone/pipelines/modules/otu_processing/biom2tsv.nf

This file was deleted.

15 changes: 15 additions & 0 deletions micone/pipelines/modules/otu_processing/export/biom2tsv.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process biom2tsv {
label 'micone'
tag "${meta.id}"
publishDir "${params.output_dir}/${f[0]}/${f[1]}/${f[2]}/${meta.id}",
mode: 'copy',
overwrite: true
input:
tuple val(meta), val(tax_level), file(otu_file), file(children_file)
output:
tuple val(meta), file("*_otu.tsv"), file("*_obs_metadata.csv"), file("*_sample_metadata.tsv"), file(children_file)
script:
String task_process = "${task.process}"
f = getHierarchy(task_process)
template 'otu_processing/export/biom2tsv.py'
}
11 changes: 0 additions & 11 deletions micone/pipelines/modules/otu_processing/filter.nf

This file was deleted.

12 changes: 0 additions & 12 deletions micone/pipelines/modules/otu_processing/group.nf

This file was deleted.

11 changes: 0 additions & 11 deletions micone/pipelines/modules/otu_processing/normalize.nf

This file was deleted.

24 changes: 24 additions & 0 deletions micone/pipelines/modules/otu_processing/otu_processing_workflow.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Main variables to be defined
// NOTE: These should be defined before any include statements


// Sequencing processing imports
include { group } from './transform/group.nf'
include { normalize} from './transform/normalize.nf'
include { biom2tsv } from './export/biom2tsv.nf'

// Main workflow
workflow denoise_cluster_workflow {
take:
// tuple val(id), file(otu_file)
input_channel
main:
input_channel \
| normalize \
| group \
| export \
emit:
// all processes have publishDir
// tuple val(meta), file("*_otu.tsv"), file("*_obs_metadata.csv"), file("*_sample_metadata.tsv"), file(children_file)
export.out
}
16 changes: 16 additions & 0 deletions micone/pipelines/modules/otu_processing/transform/group.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
process group {
label 'micone'
tag "${meta.id}"
publishDir "${params.output_dir}/${f[0]}/${f[1]}/${f[2]}/${meta.id}",
mode: 'copy',
overwrite: true
input:
tuple val(meta), file(otu_file)
each tax_level from params.otu_processing.transform['group']['tax_levels']
output:
tuple val(meta), val(tax_level), file("*.biom"), file("*.json")
script:
String task_process = "${task.process}"
f = getHierarchy(task_process)
template 'otu_processing/transform/group.py'
}
21 changes: 21 additions & 0 deletions micone/pipelines/modules/otu_processing/transform/normalize.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
process normalize {
label 'micone'
tag "${meta.id}"
publishDir "${params.output_dir}/${f[0]}/${f[1]}/${f[2]}/${meta.id}",
mode: 'copy',
overwrite: true
input:
tuple val(meta), file(otu_file)
output:
tuple val(meta), file("*_normalized.biom")
script:
String task_process = "${task.process}"
f = getHierarchy(task_process)
axis = params.otu_processing.transform['normalize']['axis']
count_thres = params.otu_processing.transform['normalize']['count_thres']
prevalence_thres = params.otu_processing.transform['normalize']['prevalence_thres']
abundance_thres = params.otu_processing.transform['normalize']['abundance_thres']
rm_sparse_obs = params.otu_processing.transform['normalize']['rm_sparse_obs']
rm_sparse_samples = params.otu_processing.transform['normalize']['rm_sparse_samples']
template 'otu_processing/transform/normalize.py'
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ def main(biom_file, base_name):


if __name__ == "__main__":
BIOM_FILE = "$otu_file"
BASE_NAME = "$level"
BIOM_FILE = "${otu_file}"
BASE_NAME = "${tax_level}"
main(BIOM_FILE, BASE_NAME)
24 changes: 0 additions & 24 deletions micone/pipelines/templates/otu_processing/filter.py

This file was deleted.

28 changes: 0 additions & 28 deletions micone/pipelines/templates/otu_processing/group.py

This file was deleted.

25 changes: 25 additions & 0 deletions micone/pipelines/templates/otu_processing/transform/group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env python3

# Script that groups OTU data on different taxa levels

import json
from typing import Tuple

from micone import Otu


# Group the otu_data on all the tax_levels
def grp_otu_data(otu_data: Otu, tax_level: str) -> Tuple[Otu, dict]:
child_otu, child_groups = otu_data.collapse_taxa(tax_level)
return child_otu, child_groups


if __name__ == "__main__":
TAX_LEVEL: str = "${tax_level}" # ['Family', 'Genus', 'Species']
OTU_FILE = "${otu_file}" # "otu.biom"
OTU_DATA = Otu.load_data(OTU_FILE)
child_otu, child_groups = grp_otu_data(OTU_DATA, TAX_LEVEL)
fname = child_otu.tax_level + "_level"
child_otu.write(fname, file_type="biom")
with open(fname + "_children.json", "w") as fid:
json.dump(child_groups, fid, indent=2, sort_keys=True)
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,35 @@

def main(
otu_file: str,
rm_sparse_samples: bool,
rm_sparse_obs: bool,
rm_sparse_samples: str,
rm_sparse_obs: str,
axis: str,
count_thres: int,
prevalence_thres: float,
abundance_thres: float,
) -> Otu:
otu = Otu.load_data(otu_file)
if rm_sparse_samples:
if rm_sparse_samples == "True":
otu = otu.rm_sparse_samples(count_thres=count_thres)
if rm_sparse_obs:
if rm_sparse_obs == "True":
otu = otu.rm_sparse_obs(
prevalence_thres=prevalence_thres, abundance_thres=abundance_thres
)
return otu.normalize(axis=axis)
if axis != "None":
otu_norm = otu.normalize(axis=axis)
else:
otu_norm = otu
return otu_norm


if __name__ == "__main__":
OTU_FILE = "$otu_file"
AXIS = "$axis"
RM_SPARSE_SAMPLES = ${params.normalize.rm_sparse_samples}
COUNT_THRES = ${params.normalize.count_thres}
RM_SPARSE_OBS = ${params.normalize.rm_sparse_obs}
PREVALENCE_THRES = ${params.normalize.prevalence_thres}
ABUNDANCE_THRES = ${params.normalize.abundance_thres}
OTU_FILE = "${otu_file}"
AXIS = "${axis}"
RM_SPARSE_SAMPLES = "${rm_sparse_samples}"
COUNT_THRES = ${count_thres}
RM_SPARSE_OBS = "${rm_sparse_obs}"
PREVALENCE_THRES = ${prevalence_thres}
ABUNDANCE_THRES = ${abundance_thres}
norm_otu = main(
OTU_FILE,
RM_SPARSE_SAMPLES,
Expand All @@ -39,4 +43,4 @@ def main(
PREVALENCE_THRES,
ABUNDANCE_THRES,
)
norm_otu.write("$id" + "_normalized")
norm_otu.write("${meta.id}" + "_normalized")

0 comments on commit 6901f16

Please sign in to comment.