/
picard_FastqToSam.xml
230 lines (177 loc) · 10.1 KB
/
picard_FastqToSam.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
<tool name="FastqToSam" id="picard_FastqToSam" version="1.126.0">
<description>convert Fastq data into unaligned BAM</description>
<requirements>
<requirement type="package" version="1.126.0">picard</requirement>
</requirements>
<macros>
<import>picard_macros.xml</import>
</macros>
<command>
@java_options@
java -jar \$JAVA_JAR_PATH/picard.jar
FastqToSam
#if str( $input_type.input_type_selector ) == "se":
FASTQ="${input_type.fastq}"
#elif str( $input_type.input_type_selector ) == "pe":
FASTQ="${input_type.fastq}"
FASTQ2="${input_type.fastq2}"
#else
FASTQ="${input_type.fastq.forward}"
FASTQ2="${input_type.fastq.reverse}"
#end if
QUALITY_FORMAT="${quality_format}"
OUTPUT="${outFile}"
READ_GROUP_NAME="${read_group_name}"
SAMPLE_NAME="${sample_name}"
#if str( $library_name ):
LIBRARY_NAME="${library_name}"
#end if
#if str( $platform_unit ):
PLATFORM_UNIT="${platform_unit}"
#end if
#if str( $platform ):
PLATFORM="${platform}"
#end if
#if str( $sequencing_center ):
SEQUENCING_CENTER="${sequencing_center}"
#end if
#if str( $predicted_insert_size ):
PREDICTED_INSERT_SIZE="${predicted_insert_size}"
#end if
#if str( $comment ):
COMMENT="${comment}"
#end if
#if str( $description ):
DESCRIPTION="${description}"
#end if
#if str( $run_date ):
RUN_DATE="${run_date}"
#end if
MIN_Q="${min_q}"
MAX_Q="${max_q}"
STRIP_UNPAIRED_MATE_NUMBER="${strip_unpairied_mate_number}"
ALLOW_AND_IGNORE_EMPTY_LINES="${allow_and_ignore_empty_lines}"
SORT_ORDER=coordinate
VALIDATION_STRINGENCY="${validation_stringency}"
QUIET=true
VERBOSITY=ERROR
</command>
<inputs>
<conditional name="input_type">
<param name="input_type_selector" type="select" label="What is your input data" help="Select between single end, paired end, and collections. See help below for full explanation of dataset types">
<option value="se">Single end (single dataset)</option>
<option value="pe">Paired end (two datasets)</option>
<option value="pc">Paired collection</option>
</param>
<when value="se">
<param name="fastq" type="data" format="fastq" label="Input fastq file for single end data" help="FASTQ"/>
</when>
<when value="pe">
<param name="fastq" type="data" format="fastq" label="Input fastq file for the first read in paired end data" help="FASTQ"/>
<param name="fastq2" type="data" format="fastq" label="Input fastq file for the second read of paired end data" help="FASTQ2"/>
</when>
<when value="pc">
<param name="fastq" type="data_collection" collection_type="paired" label="FASTQ paired dataset collection" help="FASTQ and FASTQ2; A collection of two datasets with forward and reverse reads. See help below on explanation of dataset collections"/>
</when>
</conditional>
<param name="quality_format" type="select" label="Select quality encoding scheme" help="QUALITY_FORMAT">
<option value="Standard" selected="True">Sanger (+33)</option>
<option value="Illumina">Illumina (+64)</option>
<option value="Solexa">Solexa (+66)</option>
</param>
<param name="read_group_name" type="text" size="20" value="A" label="Read group name" help="READ_GROUP_NAME"/>
<param name="sample_name" type="text" size="20" value="sample-a" label="Sample name" help="SAMPLE_NAME"/>
<param name="library_name" type="text" size="20" optional="True" label="The library name" help="LIBRARY_NAME; Optional"/>
<param name="platform_unit" type="text" size="20" optional="True" label="The platform unit (often run_barcode.lane)" help="PLATFORM_UNIT; Optional"/>
<param name="platform" type="text" size="20" optional="True" label="The platform type (e.g. illumina, 454)" help="PLATFORM; Optional"/>
<param name="sequencing_center" type="text" size="20" optional="True" label="The sequencing center from which the data originated" help="SEQUENCING_CENTER; Optional"/>
<param name="predicted_insert_size" type="integer" min="0" max="100000" optional="True" label="Predicted median insert size, to insert into the read group header" help="PREDICTED_INSERT_SIZE; Optional"/>
<param name="comment" type="text" size="20" optional="True" label="Comment to include in the output dataset's header" help="COMMENT; Optional"/>
<param name="description" type="text" size="20" optional="True" label="Optional description information" help="DESCRIPTION; Optional"/>
<param name="run_date" optional="True" type="text" label="Run date" help="RGDT; Optional; Format=YYYY-MM-DD (eg 1997-07-16)"/>
<param name="min_q" type="integer" value="0" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MIN_Q; An exception will be thrown if a quality is less than this value; default=0"/>
<param name="max_q" type="integer" value="93" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MAX_Q; An exception will be thrown if a quality is greater than this value; default=93"/>
<param name="strip_unpairied_mate_number" type="boolean" truevalue="true" falsevalue="false" label="If true and this is an unpaired fastq any occurance of '/1' will be removed from the end of a read name" help="STRIP_UNPAIRED_MATE_NUMBER; default=false"/>
<param name="allow_and_ignore_empty_lines" type="boolean" truevalue="true" falsevalue="false" label="Allow (and ignore) empty lines" help="ALLOW_AND_IGNORE_EMPTY_LINES; default=false"/>
<expand macro="VS" />
</inputs>
<outputs>
<data format="bam" name="outFile" label="${tool.name} on ${on_string}: reads as unaligned BAM"/>
</outputs>
<tests>
<test>
<param name="input_type_selector" value="pe" />
<param name="quality_format" value="Standard" />
<param name="read_group_name" value="A" />
<param name="sample_name" value="sample-a" />
<param name="library_name" value="A"/>
<param name="platform_unit" value="A"/>
<param name="platform" value="Illumina"/>
<param name="sequencing_center" value="A"/>
<param name="predicted_insert_size" value="300"/>
<param name="comment" value="A"/>
<param name="description" value="A"/>
<param name="run_date" value="2014-10-10"/>
<param name="min_q" value="0" />
<param name="max_q" value="93" />
<param name="strip_unpairied_mate_number" value="False" />
<param name="allow_and_ignore_empty_lines" value="False" />
<param name="validation_stringency" value="LENIENT"/>
<param name="fastq" value="picard_FastqToSam_read1.fq" ftype="fastq" />
<param name="fastq2" value="picard_FastqToSam_read2.fq" ftype="fastq" />
<output name="outFile" file="picard_FastqToSam_test1.bam" ftype="bam" lines_diff="4"/>
</test>
</tests>
<stdio>
<exit_code range="1:" level="fatal"/>
</stdio>
<help>
.. class:: infomark
**Purpose**
Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.
@dataset_collections@
@RG@
@description@
FASTQ=File
F1=File Input fastq file for single end data, or first read in paired end
data. Required.
FASTQ2=File
F2=File Input fastq file for the second read of paired end data (if used).
QUALITY_FORMAT=FastqQualityFormat
V=FastqQualityFormat A value describing how the quality values are encoded in the fastq. Either Solexa for
pre-pipeline 1.3 style scores (solexa scaling + 66), Illumina for pipeline 1.3 and above
(phred scaling + 64) or Standard for phred scaled scores with a character shift of 33.
If this value is not specified, the quality format will be detected automatically.
Default value: null. Possible values: {Solexa, Illumina, Standard}
READ_GROUP_NAME=String
RG=String Read group name Default value: A.
SAMPLE_NAME=String
SM=String Sample name to insert into the read group header Required.
LIBRARY_NAME=String
LB=String The library name to place into the LB attribute in the read group header.
PLATFORM_UNIT=String
PU=String The platform unit (often run_barcode.lane) to insert into the read group header.
PLATFORM=String
PL=String The platform type (e.g. illumina, solid) to insert into the read group header.
SEQUENCING_CENTER=String
CN=String The sequencing center from which the data originated.
PREDICTED_INSERT_SIZE=Integer
PI=Integer Predicted median insert size, to insert into the read group header.
COMMENT=String
CO=String Comment to include in the merged output file's header.
DESCRIPTION=String
DS=String Inserted into the read group header.
RUN_DATE=Iso8601Date
DT=Iso8601Date Date the run was produced, to insert into the read group header.
MIN_Q=Integer Minimum quality allowed in the input fastq. An exception will be thrown if a quality is
less than this value. Default value: 0.
MAX_Q=Integer Maximum quality allowed in the input fastq. An exception will be thrown if a quality is
greater than this value. Default value: 93.
STRIP_UNPAIRED_MATE_NUMBER=Boolean
If true and this is an unpaired fastq any occurance of '/1' will be removed from the end
of a read name. Default value: false. Possible values: {true, false}
ALLOW_AND_IGNORE_EMPTY_LINES=Boolean
Allow (and ignore) empty lines Default value: false. Possible values: {true, false}
@more_info@
</help>
</tool>