-
Notifications
You must be signed in to change notification settings - Fork 8
/
format-maf.cwl
153 lines (138 loc) · 4.86 KB
/
format-maf.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
$namespaces:
dct: http://purl.org/dc/terms/
foaf: http://xmlns.com/foaf/0.1/
doap: http://usefulinc.com/ns/doap#
$schemas:
- http://dublincore.org/2012/06/14/dcterms.rdf
- http://xmlns.com/foaf/spec/20140114.rdf
- http://usefulinc.com/ns/doap#
doap:release:
- class: doap:Version
doap:name: module-3
doap:revision: 1.0.0
- class: doap:Version
doap:name: cwl-wrapper
doap:revision: 1.0.0
dct:creator:
- class: foaf:Organization
foaf:name: Memorial Sloan Kettering Cancer Center
foaf:member:
- class: foaf:Person
foaf:name: Allan Bolipata
foaf:mbox: mailto:bolipatc@mskcc.org
dct:contributor:
- class: foaf:Organization
foaf:name: Memorial Sloan Kettering Cancer Center
foaf:member:
- class: foaf:Person
foaf:name: Allan Bolipata
foaf:mbox: mailto:bolipatc@mskcc.org
- class: foaf:Person
foaf:name: Nikhil Kumar
foaf:mbox: mailto:kumarn1@mskcc.org
- class: foaf:Person
foaf:name: Christopher Harris
foaf:mbox: mailto:harrisc2@mskcc.org
cwlVersion: v1.0
class: Workflow
label: format-maf
requirements:
MultipleInputFeatureRequirement: {}
ScatterFeatureRequirement: {}
SubworkflowFeatureRequirement: {}
InlineJavascriptRequirement: {}
StepInputExpressionRequirement: {}
inputs:
input_maf:
type: File
outputs:
portal_file:
type: File
outputSource: portal_format_output/portal_formatted
steps:
formatting_remove_comments:
in:
input_maf: input_maf
output_filename:
valueFrom: ${ return inputs.input_maf.basename.replace(".maf", ".grepped.txt"); }
out: [ comment_removed ]
run:
class: CommandLineTool
baseCommand: ["grep", "^[^#;]"]
stdout: $(inputs.output_filename)
inputs:
input_maf:
type: File
inputBinding:
position: 1
output_filename: string
outputs:
comment_removed:
type: stdout
extract_columns:
in:
grepped_file: formatting_remove_comments/comment_removed
output_filename:
valueFrom: ${ return inputs.grepped_file.basename.replace(".grepped.txt", ".extracted.txt"); }
out: [ extracted_file ]
run:
class: CommandLineTool
baseCommand: []
arguments:
- awk
- -F
- "\t"
- 'NR==1 { for(i=1;i<=NF;i++){ f[$i]=i } print "Hugo_Symbol\\tEntrez_Gene_Id\\tCenter\\tTumor_Sample_Barcode\\tFusion\\tMethod\\tFrame" } NR>1 { print \$(f["Hugo_Symbol"])"\\t"\$(f["Entrez_Gene_Id"])"\\t"\$(f["Center"])"\\t"\$(f["Tumor_Sample_Barcode"])"\\t"\$(f["Fusion"])"\\t"\$(f["Method"])"\\t"\$(f["Frame"])}'
stdout: $(inputs.output_filename)
inputs:
grepped_file:
type: File
inputBinding:
position: 1
output_filename: string
outputs:
extracted_file:
type: stdout
add_two_columns: # RNA_support and no, DNA_support and yes
in:
extracted_file: extract_columns/extracted_file
output_filename:
valueFrom: ${ return inputs.extracted_file.basename.replace(".extracted.txt", ".columns_added.txt"); }
out: [ columns_added ]
run:
class: CommandLineTool
baseCommand: ["sed", "1s/$/\\tDNA_support\\tRNA_support/;2,$s/$/\\tyes\\tno/"]
stdout: $(inputs.output_filename)
inputs:
extracted_file:
type: File
inputBinding:
position: 1
output_filename: string
outputs:
columns_added:
type: stdout
portal_format_output:
in:
sed_file: add_two_columns/columns_added
output_filename:
valueFrom: ${ return inputs.sed_file.basename.replace(".columns_added.txt", ".portal.txt"); }
out: [ portal_formatted ]
run:
class: CommandLineTool
baseCommand: []
arguments:
- awk
- -F
- "\t"
- 'NR==1 { for(i=1;i<=NF;i++){ f[$i]=i } } { print \$(f["Hugo_Symbol"])"\\t"\$(f["Entrez_Gene_Id"])"\\t"\$(f["Center"])"\\t"\$(f["Tumor_Sample_Barcode"])"\\t"\$(f["Fusion"])"\\t"\$(f["DNA_support"])"\\t"\$(f["RNA_support"])"\\t"\$(f["Method"])"\\t"\$(f["Frame"])}'
stdout: $(inputs.output_filename)
inputs:
sed_file:
type: File
inputBinding:
position: 1
output_filename: string
outputs:
portal_formatted:
type: stdout