Permalink
Browse files

Fix #653 (#672)

* added checking for AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS in TribbleIndexedFeatureReader

* fixing WFIterator checking of compressed file

* isGZIPPath deprecation

* added test for new functionality

* fixing URL encoding

* added no-remote test file with spaces
  • Loading branch information...
1 parent fba4637 commit c8202f23627bf6bc56cf28bb8f1ff8f6c0dd7a27 @magicDGS magicDGS committed with yfarjoun Aug 10, 2016
@@ -33,10 +33,12 @@
import htsjdk.tribble.util.ParsingUtils;
import java.io.BufferedInputStream;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -217,7 +219,7 @@ private void readHeader() throws IOException {
PositionalBufferedStream pbs = null;
try {
is = ParsingUtils.openInputStream(path);
- if (isGZIPPath(path)) {
+ if (hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) {
// TODO -- warning I don't think this can work, the buffered input stream screws up position
is = new GZIPInputStream(new BufferedInputStream(is));
}
@@ -273,7 +275,11 @@ private void readHeader() throws IOException {
return new WFIterator();
}
+ /**
+ * @deprecated use {@link #hasBlockCompressedExtension(String)} instead
+ */
//Visible for testing
+ @Deprecated
static boolean isGZIPPath(final String path) {
if (path.toLowerCase().endsWith(".gz")) {
return true;
@@ -310,7 +316,7 @@ public WFIterator() throws IOException {
final InputStream inputStream = ParsingUtils.openInputStream(path);
final PositionalBufferedStream pbs;
- if (isGZIPPath(path)) {
+ if (hasBlockCompressedExtension(path)) {
// Gzipped -- we need to buffer the GZIPInputStream methods as this class makes read() calls,
// and seekableStream does not support single byte reads
final InputStream is = new GZIPInputStream(new BufferedInputStream(inputStream, 512000));
@@ -64,7 +64,9 @@ public void testGZExtension(final String testString, final boolean expected) thr
public Object[][] createFeatureFileStrings() {
return new Object[][]{
{TestUtils.DATA_DIR + "test.vcf", 5},
- {TestUtils.DATA_DIR + "test.vcf.gz", 5}
+ {TestUtils.DATA_DIR + "test.vcf.gz", 5},
+ {TestUtils.DATA_DIR + "test.vcf.bgz", 5},
+ {TestUtils.DATA_DIR + "test with spaces.vcf", 5}
};
}
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.1
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
+20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
+20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3
+20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4
+20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
+20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3
Binary file not shown.

0 comments on commit c8202f2

Please sign in to comment.