/
module-1.scatter.chunk.cwl
executable file
·204 lines (192 loc) · 5.13 KB
/
module-1.scatter.chunk.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env cwl-runner
$namespaces:
dct: http://purl.org/dc/terms/
foaf: http://xmlns.com/foaf/0.1/
doap: http://usefulinc.com/ns/doap#
$schemas:
- http://dublincore.org/2012/06/14/dcterms.rdf
- http://xmlns.com/foaf/spec/20140114.rdf
- http://usefulinc.com/ns/doap#
doap:release:
- class: doap:Version
doap:name: module-1.scatter.chunk
doap:revision: 1.0.0
- class: doap:Version
doap:name: cwl-wrapper
doap:revision: 1.0.0
dct:creator:
- class: foaf:Organization
foaf:name: Memorial Sloan Kettering Cancer Center
foaf:member:
- class: foaf:Person
foaf:name: Christopher Harris
foaf:mbox: mailto:harrisc2@mskcc.org
dct:contributor:
- class: foaf:Organization
foaf:name: Memorial Sloan Kettering Cancer Center
foaf:member:
- class: foaf:Person
foaf:name: Christopher Harris
foaf:mbox: mailto:harrisc2@mskcc.org
- class: foaf:Person
foaf:name: Jaeyoung Chun
foaf:mbox: mailto:chunj@mskcc.org
cwlVersion: v1.0
class: Workflow
label: module-1-scatter-chunk
requirements:
MultipleInputFeatureRequirement: {}
ScatterFeatureRequirement: {}
SubworkflowFeatureRequirement: {}
InlineJavascriptRequirement: {}
inputs:
fastq1: string[]
fastq2: string[]
adapter: string
adapter2: string
bwa_output: string
add_rg_LB: string
add_rg_PL: string
add_rg_ID: string[]
add_rg_PU: string[]
add_rg_SM: string
add_rg_CN: string
tmp_dir: string
genome: string
group: string
opt_dup_pix_dist: string
outputs:
clstats1:
type:
type: array
items: File
outputSource: align/clstats1
clstats2:
type:
type: array
items: File
outputSource: align/clstats2
bam:
type: File
outputSource: mark_duplicates/bam
md_metrics:
type: File
outputSource: mark_duplicates/mdmetrics
steps:
chunking:
hints:
ResourceRequirement:
ramMin: 24
coresMin: 1
run: cmo-split-reads/1.0.1/cmo-split-reads.cwl
in:
fastq1: fastq1
fastq2: fastq2
platform_unit: add_rg_PU
out: [chunks1, chunks2]
scatter: [fastq1, fastq2, platform_unit]
scatterMethod: dotproduct
flatten:
run: flatten-array/1.0.0/flatten-array-fastq.cwl
in:
fastq1: chunking/chunks1
fastq2: chunking/chunks2
add_rg_ID: add_rg_ID
add_rg_PU: add_rg_PU
out:
[chunks1, chunks2, rg_ID, rg_PU]
align:
in:
chunkfastq1: flatten/chunks1
chunkfastq2: flatten/chunks2
adapter: adapter
adapter2: adapter2
genome: genome
bwa_output: bwa_output
add_rg_LB: add_rg_LB
add_rg_PL: add_rg_PL
add_rg_ID: flatten/rg_ID
add_rg_PU: flatten/rg_PU
add_rg_SM: add_rg_SM
add_rg_CN: add_rg_CN
tmp_dir: tmp_dir
scatter: [chunkfastq1, chunkfastq2, add_rg_ID, add_rg_PU]
scatterMethod: dotproduct
out: [clstats1, clstats2, bam]
run:
class: Workflow
inputs:
chunkfastq1: File
chunkfastq2: File
adapter: string
genome: string
adapter2: string
bwa_output: string
add_rg_LB: string
add_rg_PL: string
add_rg_ID: string
add_rg_PU: string
add_rg_SM: string
add_rg_CN: string
tmp_dir: string
outputs:
clstats1:
type: File
outputSource: trim_galore/clstats1
clstats2:
type: File
outputSource: trim_galore/clstats2
bam:
type: File
outputSource: add_rg_id/bam
steps:
trim_galore:
run: ./cmo-trimgalore/0.2.5.mod/cmo-trimgalore.cwl
in:
fastq1: chunkfastq1
fastq2: chunkfastq2
adapter: adapter
adapter2: adapter2
out: [clfastq1, clfastq2, clstats1, clstats2]
bwa:
run: ./cmo-bwa-mem/0.7.5a/cmo-bwa-mem.cwl
in:
fastq1: trim_galore/clfastq1
fastq2: trim_galore/clfastq2
basebamname: bwa_output
output:
valueFrom: |
${ return inputs.basebamname.replace(".bam", "." + inputs.fastq1.basename.match(/chunk\d\d\d/)[0] + ".bam");}
genome: genome
out: [bam]
add_rg_id:
run: ./cmo-picard.AddOrReplaceReadGroups/2.9/cmo-picard.AddOrReplaceReadGroups.cwl
in:
I: bwa/bam
O:
valueFrom: |
${ return inputs.I.basename.replace(".bam", ".rg.bam") }
LB: add_rg_LB
PL: add_rg_PL
ID: add_rg_ID
PU: add_rg_PU
SM: add_rg_SM
CN: add_rg_CN
SO:
default: "coordinate"
TMP_DIR: tmp_dir
out: [bam, bai]
mark_duplicates:
run: ./cmo-picard.MarkDuplicates/2.9/cmo-picard.MarkDuplicates.cwl
in:
group: group
OPTICAL_DUPLICATE_PIXEL_DISTANCE: opt_dup_pix_dist
I: align/bam
O:
valueFrom: |
${ return inputs.I[0].basename.replace(/\.chunk\d\d\d\.rg\.bam/, "."+ inputs.group+".rg.md.bam") }
M:
valueFrom: |
${ return inputs.I[0].basename.replace(/\.chunk\d\d\d\.rg\.bam/, ".rg.md_metrics") }
TMP_DIR: tmp_dir
out: [bam, bai, mdmetrics]