/
picard_SamToFastq.xml
200 lines (151 loc) · 10.9 KB
/
picard_SamToFastq.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
<tool name="SamToFastq" id="picard_SamToFastq" version="1.126.0">
<description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description>
<requirements>
<requirement type="package" version="1.126.0">picard</requirement>
</requirements>
<macros>
<import>picard_macros.xml</import>
</macros>
<command>
echo "BAM" > $report && ## This is necessary for output dataset detection (see output tags below)
@java_options@
java -jar \$JAVA_JAR_PATH/picard.jar
SamToFastq
INPUT="${inputFile}"
#if str( $output_per_rg ) == "true":
OUTPUT_PER_RG=true
OUTPUT_DIR=.
#elif str( $output_per_rg ) == "false" and str( $interleave ) == "false":
FASTQ=READ1.fastq
SECOND_END_FASTQ=READ2.fastq
UNPAIRED_FASTQ=UNPAIRED_READS.fastq
#elif str( $output_per_rg ) == "false" and str( $interleave ) == "true":
FASTQ=INTERLEAVED.fastq
#end if
RE_REVERSE="${re_reverse}"
INTERLEAVE="${interleave}"
INCLUDE_NON_PF_READS="${include_non_pf_reads}"
CLIPPING_ATTRIBUTE="${clipping_attribute}"
CLIPPING_ACTION="${clipping_action}"
READ1_TRIM="${read1_trim}"
#if int($read1_max_bases_to_write) > -1:
READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}"
#end if
READ2_TRIM="${read2_trim}"
#if int($read2_max_bases_to_write) > -1:
READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}"
#end if
INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}"
VALIDATION_STRINGENCY="${validation_stringency}"
QUIET=true
VERBOSITY=ERROR
</command>
<inputs>
<param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
<param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/>
<param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/>
<param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/>
<param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/>
<param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/>
<param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/>
<param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/>
<param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
<param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/>
<param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
<param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/>
<expand macro="VS" />
</inputs>
<outputs>
<!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files -->
<data format="txt" name="report" label="SamToFastq run" hidden="true">
<discover_datasets pattern="(?P<designation>.+)\.fastq" ext="fastqsanger" visible="true"/>
</data>
</outputs>
<tests>
<test>
<param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/>
<param name="output_per_rg" value="false"/>
<param name="re_reverse" value="true"/>
<param name="interleave" value="true"/>
<param name="include_non_pf_reads" value="false"/>
<param name="clipping_attribute" value="null" />
<param name="clipping_action" value="null" />
<param name="read1_trim" value="0" />
<param name="read1_max_bases_to_write" value="-1"/>
<param name="read2_trim" value="0" />
<param name="read2_max_bases_to_write" value="-1"/>
<param name="include_non_primary_alignments" value="false"/>
<output name="report">
<assert_contents>
<has_line line="BAM" />
</assert_contents>
<discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/>
</output>
</test>
</tests>
<stdio>
<exit_code range="1:" level="fatal"/>
</stdio>
<help>
**Purpose**
Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer.
-----
.. class:: warningmark
**DANGER: Multiple Outputs**
Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing!
@dataset_collections@
@description@
FASTQ=File
F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq).
Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
SECOND_END_FASTQ=File
F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null.
Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
UNPAIRED_FASTQ=File
FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default
value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
OUTPUT_PER_RG=Boolean
OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is
paired). Default value: false. Possible values: {true, false} Cannot be used in
conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F)
OUTPUT_DIR=File
ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.
Default value: null.
RE_REVERSE=Boolean
RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them
to fastq Default value: true. Possible values: {true, false}
INTERLEAVE=Boolean
INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe
which end it came from Default value: false. Possible values: {true, false}
INCLUDE_NON_PF_READS=Boolean
NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes
filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads.
Default value: false. Possible values: {true, false}
CLIPPING_ATTRIBUTE=String
CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default
value: null.
CLIPPING_ACTION=String
CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities
should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in
the clipped region; and any integer means that the base qualities should be set to that
value in the clipped region. Default value: null.
READ1_TRIM=Integer
R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0.
READ1_MAX_BASES_TO_WRITE=Integer
R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than
this many bases left after trimming, all will be written. If this value is null then all
bases left after trimming will be written. Default value: null.
READ2_TRIM=Integer
R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0.
READ2_MAX_BASES_TO_WRITE=Integer
R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than
this many bases left after trimming, all will be written. If this value is null then all
bases left after trimming will be written. Default value: null.
INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean
If true, include non-primary alignments in the output. Support of non-primary alignments
in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and
there are paired reads with non-primary alignments. Default value: false.
Possible values: {true, false}
@more_info@
</help>
</tool>