From c8202f23627bf6bc56cf28bb8f1ff8f6c0dd7a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 21:18:39 +0200 Subject: [PATCH] Fix #653 (#672) * added checking for AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS in TribbleIndexedFeatureReader * fixing WFIterator checking of compressed file * isGZIPPath deprecation * added test for new functionality * fixing URL encoding * added no-remote test file with spaces --- .../tribble/TribbleIndexedFeatureReader.java | 10 +++++++-- .../tribble/TribbleIndexFeatureReaderTest.java | 4 +++- .../resources/htsjdk/tribble/test with spaces.vcf | 24 +++++++++++++++++++++ src/test/resources/htsjdk/tribble/test.vcf.bgz | Bin 0 -> 849 bytes 4 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 src/test/resources/htsjdk/tribble/test with spaces.vcf create mode 100644 src/test/resources/htsjdk/tribble/test.vcf.bgz diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index ae278f40a..514782d1e 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -33,10 +33,12 @@ import htsjdk.tribble.util.ParsingUtils; import java.io.BufferedInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -217,7 +219,7 @@ private void readHeader() throws IOException { PositionalBufferedStream pbs = null; try { is = ParsingUtils.openInputStream(path); - if (isGZIPPath(path)) { + if (hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) { // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); } @@ -273,7 +275,11 @@ private void readHeader() throws IOException { return new WFIterator(); } + /** + * @deprecated use {@link #hasBlockCompressedExtension(String)} instead + */ //Visible for testing + @Deprecated static boolean isGZIPPath(final String path) { if (path.toLowerCase().endsWith(".gz")) { return true; @@ -310,7 +316,7 @@ public WFIterator() throws IOException { final InputStream inputStream = ParsingUtils.openInputStream(path); final PositionalBufferedStream pbs; - if (isGZIPPath(path)) { + if (hasBlockCompressedExtension(path)) { // Gzipped -- we need to buffer the GZIPInputStream methods as this class makes read() calls, // and seekableStream does not support single byte reads final InputStream is = new GZIPInputStream(new BufferedInputStream(inputStream, 512000)); diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 76bd41068..afdd827e6 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -64,7 +64,9 @@ public void testGZExtension(final String testString, final boolean expected) thr public Object[][] createFeatureFileStrings() { return new Object[][]{ {TestUtils.DATA_DIR + "test.vcf", 5}, - {TestUtils.DATA_DIR + "test.vcf.gz", 5} + {TestUtils.DATA_DIR + "test.vcf.gz", 5}, + {TestUtils.DATA_DIR + "test.vcf.bgz", 5}, + {TestUtils.DATA_DIR + "test with spaces.vcf", 5} }; } diff --git a/src/test/resources/htsjdk/tribble/test with spaces.vcf b/src/test/resources/htsjdk/tribble/test with spaces.vcf new file mode 100644 index 000000000..27d45004c --- /dev/null +++ b/src/test/resources/htsjdk/tribble/test with spaces.vcf @@ -0,0 +1,24 @@ +##fileformat=VCFv4.1 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 +20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 +20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 diff --git a/src/test/resources/htsjdk/tribble/test.vcf.bgz b/src/test/resources/htsjdk/tribble/test.vcf.bgz new file mode 100644 index 0000000000000000000000000000000000000000..44072dc94d11a640ed3c171ebe74ed1b43a75b55 GIT binary patch literal 849 zcmV-X1FrlZiwFb&00000{{{d;LjnLa1C5hiZ{j!X-=_xDMfsidrPY5ebx(GPckzudQ* zp)|@T2#wJQF}tVBQ@vAuexB#m$w-ry#j-9BwamZMR#dA*)vB;(8o3w_hg!W3-*)v7 zBZQ`^D08KkX;zxw7mp8f$_I(mMq0qGEKGXv|Coo?7iOwBn4`~$OWByaP-?C6ZFcq_ zD2Mr;`3WX`w@*frE41?x>W%jtkCKG*t=f{kgt}6@RB8WiR_3LnWtFNz-$5gPmbQqq z|MROgt&UQs#lf#+ZBm)po%teqX^U8F;Np~TRqbOKpDX3h3!@IIhN05CI<1-r{JZXm zQzp(v=2fP&^H*w)j*U;=eSMD$m!HAKrIDEvs>&R1@% zPd4*&j}Y_<*qGY3-a|B_GPSLtgY|lNxFXQC2-ySdx0}V%$yIYoAJYmt+f$nD9Zbfz ztQq-V)=XBNXMY`I^s;^q;ubNDKapc|(T)B1LA?6REx4+kOFMna=kC&4$^E;l^iDr} ztAhN8-o~GODC2Rqet8DVmkpSQVEruu!BY%Ye+EyWOBpl+2AvmJ1ol6V@a0J^Px^%* zfL-b#P-~8uOPB|yAOK_xmceEN78{>V!ljShNf7yHz$W3tWJbU=7SojomtwX8=ob?1 zNMOM*WEkLXIl3-~Sh!R$F@S^C&7%~70f?;&1@_kb>w!BDguRdav9OF0WXLXY^Da1p z6riE~X4fqlW5mb2?ZictG3zjeG2q@?FQoNBoe!lHq5B!CQwpiH=}E1`%`ukC06T(6fKZQ&pKVXp85E^L?C z^7C|8ms%QZIghp{`x^rr#f{atahkq4SrBfEv@b+pPmpdD$cFU~v{81FZ3X}UABzYC b000000RIL6LPG)o8vp|U0000000000n9!Tm literal 0 HcmV?d00001