/
wf_bacterial_orthology.cwl
176 lines (171 loc) · 4.96 KB
/
wf_bacterial_orthology.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env cwl-runner
label: bacterial_orthology
cwlVersion: v1.2
class: Workflow
requirements:
- class: SubworkflowFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: NetworkAccess
networkAccess: true
inputs:
input: File
taxid: int
naming_sqlite: File
taxon_db: File
gc_cache: File
asn_cache: Directory[]
blast_hits_cache: File?
genus_list: int[]
blastdb: string[]
scatter_gather_nchunks: string
gencoll_asn: File
gc_id_list_orth: File
outputs:
output:
type: File
outputSource: propagate_symbols_to_genes/output
steps:
prepare_annotation_input:
label: "Prepare Annotation Input"
run: ../progs/asn_translator.cwl
in:
input: input
output_name: { default: 'input_text.asn' }
out: [output]
get_assemblies_for_orthologs_gencoll_asn:
label: "Get Assemblies for Orthologs GenColl ASN"
run: ../task_types/tt_gcaccess_from_list.cwl
in:
gc_id_list: gc_id_list_orth
gc_cache: gc_cache
out: [gencoll_asn]
get_ortholog_nucleotide_ids:
label: "Get Assemblies for Orthologs GenColl ASN/nucleotide ids"
run: ../progs/gc_get_molecules.cwl
in:
gc_assembly: get_assemblies_for_orthologs_gencoll_asn/gencoll_asn
out: [molecules]
extract_ortholog_nucleotide_asn:
label: "Extract Orthologous Prokaryotic Proteins: fetch nucleotide ASN.1"
run: ../progs/gp_fetch_sequences.cwl
in:
asn_cache: asn_cache
input: get_ortholog_nucleotide_ids/molecules
out_asn_name: {default: 'orhologous.nucleotide.asn'}
out: [output]
extract_protein_references_for_orthology:
label: "Extract Orthologous Prokaryotic Proteins"
run: ../progs/protein_extract.cwl
in:
input: extract_ortholog_nucleotide_asn/output
nogenbank:
default: true
out: [proteins]
extract_protein_targets_for_orthology:
label: "Extract Protein Targets for Orthology"
run: ../progs/protein_extract.cwl
in:
input: input
nogenbank:
default: true
out: [proteins, lds2, seqids]
create_orthologous_prokaryotic_protein_blastdb:
label: "Create Orthologous Prokaryotic Protein BLASTdb"
run: ../progs/gp_makeblastdb.cwl
in:
asn_cache: asn_cache
asnb: extract_protein_references_for_orthology/proteins
dbtype:
default: prot
title:
default: 'protein database for orhology graph'
out: [blastdb]
blastp_current_prokaryotic_proteins_vs_orthologs:
label: "BLASTp Current Prokaryotic Proteins vs Orthologs"
run: ../task_types/tt_blastp_wnode_naming.cwl
in:
scatter_gather_nchunks: scatter_gather_nchunks
# files/directories
ids:
source: [extract_protein_targets_for_orthology/seqids]
linkMerge: merge_flattened
lds2: extract_protein_targets_for_orthology/lds2
proteins: extract_protein_targets_for_orthology/proteins
blastdb_dir: create_orthologous_prokaryotic_protein_blastdb/blastdb
blastdb:
source:
- blastdb
linkMerge: merge_flattened
affinity:
default: subject
asn_cache: asn_cache
align_filter:
default: 'score>0 && pct_identity_gapopen_only > 35'
allow_intersection:
default: true
comp_based_stats:
default: F
compart:
default: true
dbsize:
default: '6000000000'
delay:
default: 0
evalue:
default: 0.1
max_batch_length:
default: 10000
max_jobs:
default: 1
max_target_seqs:
default: 50
no_merge:
default: false
nogenbank:
default: true
ofmt:
default: asn-binary
seg:
default: '30 2.2 2.5'
soft_masking:
default: 'yes'
threshold:
default: 21
top_by_score:
default: 10
word_size:
default: 6
taxid: taxid
genus_list: genus_list
blast_hits_cache:
source: blast_hits_cache
blast_type:
default: 'predicted-protein'
taxon_db: taxon_db
out: [blast_align]
cat_aligns:
label: "BLASTp Current Prokaryotic Proteins vs Orthologs/cat aligns"
run: ../progs/cat.cwl
in:
input: blastp_current_prokaryotic_proteins_vs_orthologs/blast_align
output_file_name: { default: 'blastp.align.asn' }
out: [output]
find_prokarotic_orthologs:
label: "Find Prokaryotic Orthologs"
run: ../progs/find_orthologs.cwl
in:
gc1: gencoll_asn
gc2: get_assemblies_for_orthologs_gencoll_asn/gencoll_asn
annots1: prepare_annotation_input/output
prot_hits: cat_aligns/output
asn_cache: asn_cache
out: [orthologs]
propagate_symbols_to_genes:
label: "Propagate symbols to genes"
run: ../progs/propagate_symbols_to_genes.cwl
in:
orthologs: find_prokarotic_orthologs/orthologs
input: prepare_annotation_input/output
it:
default: true
out: [output]