-
Notifications
You must be signed in to change notification settings - Fork 6
/
tmb_workflow.cwl
182 lines (169 loc) · 5.88 KB
/
tmb_workflow.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env cwl-runner
cwlVersion: v1.2
class: Workflow
doc: "
Workflow to run the TMB analysis on a batch of samples and merge the results back into a single data clinical file
"
requirements:
- class: MultipleInputFeatureRequirement
- class: ScatterFeatureRequirement
- class: StepInputExpressionRequirement
- class: InlineJavascriptRequirement
- class: SubworkflowFeatureRequirement
- $import: types.yml
inputs:
assay_coverage:
type: string
doc: "genome_coverage value; amount of the genome in bp covered by the assay"
pairs: "types.yml#TMBInputPair[]"
# NOTE: breadth of coverage for different target exome assays, used in TMB analysis
# assay_coverages = {
# "IMPACT341": 896637,
# "IMPACT468": 1139294,
# "IMPACT410": 1016335,
# "IMPACT505": 1213770
# }
steps:
# run the TMB analysis for each tumor sample in the list of pairs
run_tmb_workflow:
scatter: pair
in:
pair: pairs
mutations_file:
valueFrom: ${ return inputs.pair['pair_maf']; }
sample_id:
valueFrom: ${ return inputs.pair['tumor_id']; }
normal_id:
valueFrom: ${ return inputs.pair['normal_id']; }
pair_id:
valueFrom: ${ return inputs.pair['pair_id']; }
assay_coverage: assay_coverage
out:
[ pair ]
run:
class: Workflow
inputs:
mutations_file:
type: File
doc: "File with mutations for the sample"
assay_coverage:
type: string
doc: "genome_coverage value; amount of the genome in bp covered by the assay"
sample_id:
type: string
normal_id:
type: string
pair_id:
type: string
outputs:
pair:
type: "types.yml#TMBOutputPair"
outputSource: create_tmb_pair_output/pair
steps:
muts_maf_filter:
doc: run the cBioPortal maf filter on the input mutations
run: maf_filter.cwl
in:
pair_id: pair_id
maf_file: mutations_file
argos_version_string:
valueFrom: ${return ""; }
is_impact:
valueFrom: ${return true; }
cbio_mutation_data_filename:
valueFrom: ${ return inputs.pair_id + ".tmb.maf"; }
out: [ cbio_mutation_data_file ]
# filter_variants:
# doc: filter the variant maf file for only the variants desired for use in TMB calculation
# run: tmb_variant_filter.cwl
# in:
# pair_id: pair_id
# input_file: muts_maf_filter/cbio_mutation_data_file # mutations_file
# output_filename:
# valueFrom: ${ return inputs.pair_id + ".tmb.maf"; }
# out:
# [ output_file ]
calc_tmb_value:
doc: calculate the TMB for the variants present based on assay coverage
run: calc-tmb.cwl
in:
pair_id: pair_id
input_file: muts_maf_filter/cbio_mutation_data_file
# input_file: filter_variants/output_file
output_filename:
valueFrom: ${ return inputs.pair_id + ".tmb.txt"; }
genome_coverage: assay_coverage
normal_id: normal_id
out:
[ output_file ]
fix_tmb_header:
doc: turn the TMB value into a table format with header
run: add_header.cwl
in:
input_file: calc_tmb_value/output_file
header_str:
valueFrom: ${ return "CMO_TMB_SCORE"; }
out:
[ output_file ]
add_sampleID:
doc: add the sample ID back to the TMB table file
run: paste-col.cwl
in:
pair_id: pair_id
input_file: fix_tmb_header/output_file
output_filename: # NOTE: we plan to concat this file later so it needs to have a unique filename !!
valueFrom: ${ return inputs.pair_id + ".tmb.tsv"; }
header:
valueFrom: ${ return "SampleID"; } # TODO: Change this to SAMPLE_ID
value: sample_id
out:
[ output_file ]
# TODO: add this !! otherwise its difficult to know what values were used later!
# NOTE: requires updating the cBioPortal file header schema to include new column
# add_assay_coverage:
# doc: add the assay coverage to the table
# run: paste-col.cwl
# in:
# pair_id: pair_id
# input_file: add_sampleID/output_file
# output_filename: # NOTE: we plan to concat this file later so it needs to have a unique filename !!
# valueFrom: ${ return inputs.pair_id + ".tmb.tsv"; }
# header:
# valueFrom: ${ return "CMO_ASSAY_COVERAGE"; }
# value: assay_coverage
# out:
# [ output_file ]
create_tmb_pair_output:
doc: gather the TMB analysis outputs into a pair entry
in:
pair_id: pair_id
tumor_id: sample_id
normal_id: normal_id
tmb_maf: muts_maf_filter/cbio_mutation_data_file # filter_variants/output_file
tmb_tsv: add_sampleID/output_file
out: [ pair ]
run:
class: ExpressionTool
inputs:
pair_id: string
tumor_id: string
normal_id: string
tmb_maf: File
tmb_tsv: File
outputs:
pair: "types.yml#TMBOutputPair"
expression: |
${
var pair = {
"pair_id": inputs.pair_id,
"tumor_id": inputs.tumor_id,
"normal_id": inputs.normal_id,
"tmb_maf": inputs.tmb_maf,
"tmb_tsv": inputs.tmb_tsv,
};
return {"pair": pair};
}
outputs:
pairs:
type: "types.yml#TMBOutputPair[]"
outputSource: run_tmb_workflow/pair