-
Notifications
You must be signed in to change notification settings - Fork 6
/
analysis-workflow.cwl
195 lines (186 loc) · 6.69 KB
/
analysis-workflow.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
requirements:
ScatterFeatureRequirement: {}
StepInputExpressionRequirement: {}
InlineJavascriptRequirement: {}
SubworkflowFeatureRequirement: {}
inputs:
analysis_segment_cna_filename:
type: string
doc: "(ANALYSIS_SEGMENT_CNA_FILE; <project_id>.seg.cna.txt)"
analysis_sv_filename:
type: string
doc: "(ANALYSIS_SV_FILE; <project_id>.svs.maf)"
analysis_gene_cna_filename:
type: string
doc: "(ANALYSIS_GENE_CNA_FILENAME; <project_id>.gene.cna.txt)"
analysis_mutations_filename:
type: string
doc: "(ANALYSIS_MUTATIONS_FILENAME; <project_id>.muts.maf)"
analysis_mutations_share_filename:
type: string
doc: "<project_id>.muts.share.maf"
mutation_maf_files:
type: File[]
doc: "analysis_mutations_filename (ANALYSIS_MUTATIONS_FILENAME) cbio_mutation_data_filename (CBIO_MUTATION_DATA_FILENAME): (MAF_DIR)/*.muts.maf"
facets_hisens_seg_files:
type: File[]
doc: "cbio_segment_data_filename (CBIO_SEGMENT_DATA_FILENAME; <project_id>_data_cna_hg19.seg) analysis_segment_cna_filename (ANALYSIS_SEGMENT_CNA_FILE; <project_id>.seg.cna.txt): (FACETS_DIR)/*_hisens.seg"
facets_hisens_cncf_files:
type: File[]
doc: "cbio_cna_data_filename (CBIO_CNA_DATA_FILENAME; data_CNA.txt) analysis_gene_cna_filename (ANALYSIS_GENE_CNA_FILENAME; <project_id>.gene.cna.txt): (FACETS_DIR)/*_hisens.cncf.txt"
mutation_svs_maf_files:
type: File[]
doc: "analysis_sv_filename (ANALYSIS_SV_FILE; <project_id>.svs.maf): (MAF_DIR)/*.svs.pass.vep.maf"
targets_list:
type: File
argos_version_string:
type: string
doc: "the version label of Roslin / Argos used to run the project analysis (ARGOS_VERSION_STRING)"
is_impact:
default: true
type: boolean
doc: "whether or not the project is an IMPACT project; should be the value 'True' if so, otherwise any other value means 'False' (IS_IMPACT)"
helix_filter_version:
type: string
doc: "the version label of this helix filter repo (HELIX_FILTER_VERSION; git describe --all --long)"
IMPACT_gene_list:
type: File
doc: "TSV file with gene labels and corresponding impact assays"
steps:
# <project_id>.gene.cna.txt (analysis_gene_cna_filename)
generate_cna_data:
run: copy_number.cwl
in:
output_cna_filename: analysis_gene_cna_filename
output_cna_ascna_filename:
valueFrom: ${ return inputs.output_cna_filename.replace(/\.[^/.]+$/, "") + '.ascna.txt'; }
output_cna_scna_filename:
valueFrom: ${ return inputs.output_cna_filename.replace(/\.[^/.]+$/, "") + '.scna.txt'; }
targets_list: targets_list
hisens_cncfs: facets_hisens_cncf_files
out:
[ output_cna_file ]
# <project_id>.muts.maf (analysis_mutations_filename)
# filter each maf file
muts_maf_filter:
run: maf_filter.cwl
scatter: maf_file
in:
maf_file: mutation_maf_files
argos_version_string: argos_version_string
is_impact: is_impact
analysis_mutations_filename: analysis_mutations_filename # <project_id>.muts.maf
out: [ analysis_mutations_file ]
# concat all the maf files into a single table
concat_analysis_muts_maf:
run: concat-tables.cwl
in:
input_files: muts_maf_filter/analysis_mutations_file
output_filename: analysis_mutations_filename # <project_id>.muts.maf
comments:
valueFrom: ${ return true; }
out:
[ output_file ]
# Need to add a version label to the maf file as per Nick's request
add_maf_comment:
run: concat_with_comments.cwl
in:
some_file: concat_analysis_muts_maf/output_file
input_files:
valueFrom: ${ return [ inputs.some_file ]; }
comment_value: helix_filter_version
output_filename: analysis_mutations_filename # <project_id>.muts.maf
out:
[ output_file ]
# add the AF allele frequency column to the maf
add_af:
run: add_af.cwl
in:
input_file: add_maf_comment/output_file
output_filename: analysis_mutations_filename
out:
[ output_file ]
# label all the mutations that are in a gene covered by an IMPACT assay
add_is_in_impact:
run: add_is_in_impact.cwl
in:
input_file: add_af/output_file
output_filename: analysis_mutations_filename
IMPACT_file: IMPACT_gene_list
out:
[ IMPACT_col_added_file ]
# create a version of the maf with fewer columns; shareable maf <project_id>.muts.share.maf
filter_maf_cols:
run: maf_col_filter.cwl
in:
input_file: add_is_in_impact/IMPACT_col_added_file
output_filename: analysis_mutations_share_filename # <project_id>.muts.share.maf
out:
[ output_file ]
# <project_id>.seg.cna.txt (analysis_segment_cna_filename)
# need to reduce the number of significant figures in the hisens_segs files
reduce_sig_figs_hisens_segs:
run: reduce_sig_figs.cwl
scatter: input_file
in:
input_file: facets_hisens_seg_files
out:
[output_file]
# concatenate all of the hisens_segs files
concat_hisens_segs:
run: concat.cwl
in:
input_files: reduce_sig_figs_hisens_segs/output_file
out:
[output_file]
# rename the output file
rename_analysis_hisens_segs:
run: cp.cwl
in:
input_file: concat_hisens_segs/output_file
output_filename: analysis_segment_cna_filename # <project_id>.seg.cna.txt
out:
[output_file]
# <project_id>.svs.maf (analysis_sv_filename)
# (MAF_DIR)/*.svs.pass.vep.maf (mutation_svs_maf_files)
generate_analysis_svs_maf:
run: concat_with_comments.cwl
in:
input_files: mutation_svs_maf_files
comment_value: helix_filter_version
out:
[output_file]
rename_analysis_svs_maf:
run: cp.cwl
in:
input_file: generate_analysis_svs_maf/output_file
output_filename: analysis_sv_filename # <project_id>.svs.maf
out:
[output_file]
# create the 'analysis' directory and put some files in it
make_analysis_dir:
run: put_in_dir.cwl
in:
gene_cna_file: generate_cna_data/output_cna_file # <project_id>.gene.cna.txt
muts_maf_file: add_is_in_impact/IMPACT_col_added_file # <project_id>.muts.maf
muts_share_maf_file: filter_maf_cols/output_file
hisens_segs: rename_analysis_hisens_segs/output_file # <project_id>.seg.cna.txt
svs_maf_file: rename_analysis_svs_maf/output_file # <project_id>.svs.maf
output_directory_name:
valueFrom: ${ return "analysis"; }
files:
valueFrom: ${ return [
inputs.gene_cna_file,
inputs.muts_maf_file,
inputs.muts_share_maf_file,
inputs.hisens_segs,
inputs.svs_maf_file
]}
out: [ directory ]
outputs:
analysis_dir:
type: Directory
outputSource: make_analysis_dir/directory