diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index ae278f40a..514782d1e 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -33,10 +33,12 @@ import htsjdk.tribble.util.ParsingUtils; import java.io.BufferedInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -217,7 +219,7 @@ private void readHeader() throws IOException { PositionalBufferedStream pbs = null; try { is = ParsingUtils.openInputStream(path); - if (isGZIPPath(path)) { + if (hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) { // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); } @@ -273,7 +275,11 @@ private void readHeader() throws IOException { return new WFIterator(); } + /** + * @deprecated use {@link #hasBlockCompressedExtension(String)} instead + */ //Visible for testing + @Deprecated static boolean isGZIPPath(final String path) { if (path.toLowerCase().endsWith(".gz")) { return true; @@ -310,7 +316,7 @@ public WFIterator() throws IOException { final InputStream inputStream = ParsingUtils.openInputStream(path); final PositionalBufferedStream pbs; - if (isGZIPPath(path)) { + if (hasBlockCompressedExtension(path)) { // Gzipped -- we need to buffer the GZIPInputStream methods as this class makes read() calls, // and seekableStream does not support single byte reads final InputStream is = new GZIPInputStream(new BufferedInputStream(inputStream, 512000)); diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 76bd41068..afdd827e6 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -64,7 +64,9 @@ public void testGZExtension(final String testString, final boolean expected) thr public Object[][] createFeatureFileStrings() { return new Object[][]{ {TestUtils.DATA_DIR + "test.vcf", 5}, - {TestUtils.DATA_DIR + "test.vcf.gz", 5} + {TestUtils.DATA_DIR + "test.vcf.gz", 5}, + {TestUtils.DATA_DIR + "test.vcf.bgz", 5}, + {TestUtils.DATA_DIR + "test with spaces.vcf", 5} }; } diff --git a/src/test/resources/htsjdk/tribble/test with spaces.vcf b/src/test/resources/htsjdk/tribble/test with spaces.vcf new file mode 100644 index 000000000..27d45004c --- /dev/null +++ b/src/test/resources/htsjdk/tribble/test with spaces.vcf @@ -0,0 +1,24 @@ +##fileformat=VCFv4.1 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 +20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 +20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 diff --git a/src/test/resources/htsjdk/tribble/test.vcf.bgz b/src/test/resources/htsjdk/tribble/test.vcf.bgz new file mode 100644 index 000000000..44072dc94 Binary files /dev/null and b/src/test/resources/htsjdk/tribble/test.vcf.bgz differ