/
wf_protein_alignment.cwl
executable file
·115 lines (102 loc) · 2.82 KB
/
wf_protein_alignment.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
label: "Align reference proteins plane complete workflow"
requirements:
- class: SubworkflowFeatureRequirement
inputs:
asn_cache: Directory
uniColl_asn_cache: Directory
naming_sqlite: File
blastdb_dir: Directory
taxid: int
tax_sql_file: File
gc_assembly: File
compartments: File
outputs:
universal_clusters:
type: File
outputSource: Get_Proteins/universal_clusters
align:
type: File
outputSource: Filter_Protein_Alignments_I/align
align_non_match:
type: File
outputSource: Filter_Protein_Alignments_I/align_non_match
steps:
Get_Proteins:
run: bacterial_prot_src.cwl
in:
uniColl_asn_cache: uniColl_asn_cache
naming_sqlite: naming_sqlite
taxid: taxid
tax_sql_file: tax_sql_file
out: [ universal_clusters, all_prots ]
Compute_Gencode:
run: ../progs/compute_gencode.cwl
in:
taxid: taxid
taxon_db: tax_sql_file
gencode:
default: true
out: [ output ]
Compute_Gencode_int:
run: ../progs/file2int.cwl
in:
input: Compute_Gencode/output
out: [ value ]
Seed_Search_Compartments:
run: wf_seed.cwl
in:
db_gencode: Compute_Gencode_int/value
asn_cache: asn_cache
uniColl_asn_cache: uniColl_asn_cache
compartments: compartments
out: [ blast_align ]
Seed_Protein_Alignments:
run: wf_seed_1.cwl
in:
db_gencode: Compute_Gencode_int/value
asn_cache: asn_cache
uniColl_asn_cache: uniColl_asn_cache
seqids: Get_Proteins/all_prots
blastdb_dir: blastdb_dir
out: [ blast_align ]
cat:
run: cat.cwl
in:
file_in_1: Seed_Protein_Alignments/blast_align
file_in_2: Seed_Search_Compartments/blast_align
out: [ file_out ]
Sort_Seed_Hits:
run: align_sort.cwl
in:
asn_cache: asn_cache
uniColl_asn_cache: uniColl_asn_cache
blast_aligns: cat/file_out
limit_mem:
default: '13G'
out: [ sorted_aligns ]
Filter_Full_Coverage_Alignments_I:
run: bacterial_protalign_filter.cwl
in:
asn_cache: asn_cache
uniColl_asn_cache: uniColl_asn_cache
sorted_seeds: Sort_Seed_Hits/sorted_aligns
out: [ blast_full_cov, blast_partial_cov ]
compart_filter_prosplign:
run: wf_compart_filter_prosplign.cwl
in:
asn_cache: asn_cache
uniColl_asn_cache: uniColl_asn_cache
seed_hits: Filter_Full_Coverage_Alignments_I/blast_partial_cov
gc_assembly: gc_assembly
out: [ prosplign_align ]
Filter_Protein_Alignments_I:
run: wf_align_filter.cwl
in:
asn_cache: asn_cache
uniColl_asn_cache: uniColl_asn_cache
blast_full: Filter_Full_Coverage_Alignments_I/blast_full_cov
prosplign: compart_filter_prosplign/prosplign_align
out: [ align, align_non_match ]