Skip to content

Commit

Permalink
Handle spaces in input strings in Tabix and SeekableStreamFactory. (#…
Browse files Browse the repository at this point in the history
…1487)

* Fix exceptions when trying to create a VCFFileReader on a VCF/.tbi when they're located in a path that contains spaces in the name (see broadinstitute/gatk#6664).
* The exception is caused by asymmetric assumptions about the encoding of the input string in different layers.  The fix is to decode the input String in SeekableStreamFactory before passing it on to the File constructor.
  • Loading branch information
cmnbroad committed Jul 8, 2020
1 parent 78941e4 commit a98d3a7
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 3 deletions.
Expand Up @@ -26,6 +26,7 @@
import htsjdk.samtools.util.IOUtil;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.nio.channels.SeekableByteChannel;
import java.util.function.Function;
Expand Down Expand Up @@ -96,7 +97,14 @@ public SeekableStream getStreamFor(final String path,
} else if (path.startsWith("ftp:")) {
return new SeekableFTPStream(new URL(path));
} else if (path.startsWith("file:")) {
return new SeekableFileStream(new File(new URL(path).getPath()));
try {
// convert to URI in order to obtain a decoded version of the path string suitable
// for use with the File constructor
final String decodedPath = new URI(path).getPath();
return new SeekableFileStream(new File(decodedPath));
} catch (java.net.URISyntaxException e) {
throw new IllegalArgumentException(String.format("The input string %s contains a URI scheme but is not a valid URI", path), e);
}
} else if (IOUtil.hasScheme(path)) {
return new SeekablePathStream(IOUtil.getPath(path), wrapper);
} else {
Expand Down
@@ -1,20 +1,24 @@
package htsjdk.samtools.seekablestream;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.TestUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Paths;

public class SeekableStreamFactoryTest extends HtsjdkTest {
private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools");

@Test
public void testIsFilePath() throws Exception {
public void testIsFilePath() {
Assert.assertEquals(SeekableStreamFactory.isFilePath("x"), true);
Assert.assertEquals(SeekableStreamFactory.isFilePath(""), true);
Assert.assertEquals(SeekableStreamFactory.isFilePath("http://broadinstitute.org"), false);
Expand All @@ -23,7 +27,7 @@ public void testIsFilePath() throws Exception {
}

@DataProvider(name="getStreamForData")
public Object[][] getStreamForData() throws Exception {
public Object[][] getStreamForData() throws MalformedURLException {
return new Object[][] {
{ new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam").getAbsolutePath(),
new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam").getAbsolutePath() },
Expand All @@ -43,4 +47,29 @@ public void testGetStreamFor(final String path, final String expectedPath) throw
Assert.assertEquals(SeekableStreamFactory.getInstance().getStreamFor(path).getSource(), expectedPath);
}

@Test
public void testPathWithEmbeddedSpace() throws IOException {
final File testBam = new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam");

//create a temp dir with a space in the name and copy the test file there
final File tempDir = IOUtil.createTempDir("test spaces", "");
Assert.assertTrue(tempDir.getAbsolutePath().contains(" "));
tempDir.deleteOnExit();
final File inputBam = new File(tempDir, "index_test.bam");
inputBam.deleteOnExit();
IOUtil.copyFile(testBam, inputBam);

// make sure the input string we use is URL-encoded
final String inputString = Paths.get(inputBam.getAbsolutePath()).toUri().toString();
Assert.assertFalse(inputString.contains(" "));
Assert.assertTrue(inputString.contains("%20"));

try (final SeekableStream seekableStream =
SeekableStreamFactory.getInstance().getStreamFor(inputString)) {
final int BYTES_TO_READ = 10;
Assert.assertEquals(seekableStream.read(new byte[BYTES_TO_READ], 0,BYTES_TO_READ), BYTES_TO_READ);
}

}

}
29 changes: 29 additions & 0 deletions src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java
Expand Up @@ -3,6 +3,9 @@
import com.google.common.jimfs.Configuration;
import com.google.common.jimfs.Jimfs;
import htsjdk.HtsjdkTest;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.IOUtil;
import htsjdk.tribble.TestUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
Expand All @@ -12,6 +15,7 @@
import java.io.IOException;
import java.nio.file.FileSystem;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -97,4 +101,29 @@ public void testCanOpenVCFPathReader(final String file, final String index, fina
// fail if a test that should have thrown didn't
Assert.assertTrue(shouldSucceed, "Test should have failed but succeeded");
}

@Test
public void testTabixFileWithEmbeddedSpaces() throws IOException {
final File testVCF = new File(TEST_DATA_DIR, "HiSeq.10000.vcf.bgz");
final File testTBI = new File(TEST_DATA_DIR, "HiSeq.10000.vcf.bgz.tbi");

// Copy the input files into a temporary directory with embedded spaces in the name.
// This test needs to include the associated .tbi file because we want to force execution
// of the tabix code path.
final File tempDir = IOUtil.createTempDir("test spaces", "");
Assert.assertTrue(tempDir.getAbsolutePath().contains(" "));
tempDir.deleteOnExit();
final File inputVCF = new File(tempDir, "HiSeq.10000.vcf.bgz");
inputVCF.deleteOnExit();
final File inputTBI = new File(tempDir, "HiSeq.10000.vcf.bgz.tbi");
inputTBI.deleteOnExit();
IOUtil.copyFile(testVCF, inputVCF);
IOUtil.copyFile(testTBI, inputTBI);

try (final VCFFileReader vcfFileReader = new VCFFileReader(inputVCF)) {
Assert.assertNotNull(vcfFileReader.getFileHeader());
}

}

}

0 comments on commit a98d3a7

Please sign in to comment.