Tribble/Tabix index path support #810

Merged
merged 9 commits into from Mar 10, 2017
@@ -23,7 +23,6 @@
*/
package htsjdk.tribble;
-import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.tribble.readers.*;
@@ -27,6 +27,7 @@
import htsjdk.tribble.util.TabixUtils;
import java.io.File;
+import java.nio.file.Path;
/**
* Common, tribble wide constants and static functions
@@ -37,49 +38,69 @@ private Tribble() { } // can't be instantiated
public final static String STANDARD_INDEX_EXTENSION = ".idx";
/**
- * Return the name of the index file for the provided vcf {@code filename}
+ * Return the name of the index file for the provided {@code filename}
* Does not actually create an index
- * @param filename name of the vcf file
+ * @param filename name of the file
* @return non-null String representing the index filename
*/
public static String indexFile(final String filename) {
return indexFile(filename, STANDARD_INDEX_EXTENSION);
}
/**
- * Return the File of the index file for the provided vcf {@code file}
+ * Return the File of the index file for the provided {@code file}
* Does not actually create an index
- * @param file the vcf file
+ * @param file the file
* @return a non-null File representing the index
*/
public static File indexFile(final File file) {
return indexFile(file.getAbsoluteFile(), STANDARD_INDEX_EXTENSION);
}
/**
- * Return the name of the tabix index file for the provided vcf {@code filename}
+ * Return the name of the index file for the provided {@code path}
* Does not actually create an index
- * @param filename name of the vcf file
+ * @param path the path
+ * @return Path representing the index filename
+ */
+ public static Path indexPath(final Path path) {
+ return path.getFileSystem().getPath(indexFile(path.toAbsolutePath().toString()));
+ }
+
+ /**
+ * Return the name of the tabix index file for the provided {@code filename}
+ * Does not actually create an index
+ * @param filename name of the file
* @return non-null String representing the index filename
*/
public static String tabixIndexFile(final String filename) {
return indexFile(filename, TabixUtils.STANDARD_INDEX_EXTENSION);
}
/**
- * Return the File of the tabix index file for the provided vcf {@code file}
+ * Return the File of the tabix index file for the provided {@code file}
* Does not actually create an index
- * @param file the vcf file
+ * @param file the file
* @return a non-null File representing the index
*/
public static File tabixIndexFile(final File file) {
return indexFile(file.getAbsoluteFile(), TabixUtils.STANDARD_INDEX_EXTENSION);
}
/**
- * Return the name of the index file for the provided vcf {@code filename} and {@code extension}
+ * Return the name of the tabix index file for the provided {@code path}
+ * Does not actually create an index
+ * @param path the path
+ * @return Path representing the index filename
+ */
+ public static Path tabixIndexPath(final Path path) {
+ return path.getFileSystem().getPath(tabixIndexFile(path.toAbsolutePath().toString()));
+ }
+
+ /**
+ * Return the name of the index file for the provided {@code filename} and {@code extension}
* Does not actually create an index
- * @param filename name of the vcf file
+ * @param filename name of the file
* @param extension the extension to use for the index
* @return non-null String representing the index filename
*/
@@ -88,9 +109,9 @@ private static String indexFile(final String filename, final String extension) {
}
/**
- * Return the File of the index file for the provided vcf {@code file} and {@code extension}
+ * Return the File of the index file for the provided {@code file} and {@code extension}
* Does not actually create an index
- * @param file the vcf file
+ * @param file the file
* @param extension the extension to use for the index
* @return a non-null File representing the index
*/
@@ -33,11 +33,9 @@
import htsjdk.tribble.util.ParsingUtils;
import java.io.BufferedInputStream;
-import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
-import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.channels.SeekableByteChannel;
import java.util.ArrayList;
@@ -18,15 +18,19 @@
package htsjdk.tribble.index;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.tribble.Tribble;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.util.LittleEndianInputStream;
import htsjdk.tribble.util.LittleEndianOutputStream;
import java.io.BufferedOutputStream;
import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
@@ -67,11 +71,12 @@
private final static long NO_TS = -1L;
protected int version; // Our version value
- protected File indexedFile = null; // The file we've created this index for
+ protected Path indexedPath = null; // The file we've created this index for
@lbergelson

lbergelson Mar 8, 2017

Contributor

This is technically a breaking change. I'm going to say it's fine though. I don't believe there are any subclasses of AbstractIndex in the wild that we're going to be breaking.

@magicDGS

magicDGS Mar 9, 2017

Contributor

Oh, I haven't realize that it is breaking compatibility. If we do not want to break compatibility, I can kept the field with the deprecated annotation and set it when the used constructor is a File. Let me know if I should do that.

@lbergelson

lbergelson Mar 9, 2017

Contributor

Yeah, it's tricky. Anything field that's visible to a subclass of a non-final class is technically a breaking change since someone could be relying on it if they implement a subclass. It's why I'm so strongly in favor of making classes final and of making variable private with accessors if they need to be accessed.

protected long indexedFileSize = NO_FILE_SIZE; // The size of the indexed file
protected long indexedFileTS = NO_TS; // The timestamp
protected String indexedFileMD5 = NO_MD5; // The MD5 value, generally not filled in (expensive to calc)
protected int flags;
+ protected final Log logger = Log.getInstance(this.getClass());
public boolean hasFileSize() {
return indexedFileSize != NO_FILE_SIZE;
@@ -116,8 +121,8 @@ public boolean equalsIgnoreProperties(final Object obj) {
return false;
}
- if (indexedFile != other.indexedFile && (indexedFile == null || !indexedFile.equals(other.indexedFile))) {
- System.err.printf("equals indexedFile: this %s != other %s%n", indexedFile, other.indexedFile);
+ if (indexedPath != other.indexedPath && (indexedPath == null || !indexedPath.equals(other.indexedPath))) {
+ System.err.printf("equals indexedPath: this %s != other %s%n", indexedPath, other.indexedPath);
return false;
}
@@ -159,18 +164,27 @@ public AbstractIndex() {
* @param featureFile the feature file to create an index from
*/
public AbstractIndex(final String featureFile) {
- this(new File(featureFile));
+ this();
+ try {
+ this.indexedPath = IOUtil.getPath(featureFile).toAbsolutePath();
+ } catch (IOException e) {
+ throw new IllegalArgumentException("IO error: " + e.getMessage(), e);
+ }
}
public AbstractIndex(final File featureFile) {
+ this(featureFile.toPath());
+ }
+
+ public AbstractIndex(final Path featurePath) {
this();
- this.indexedFile = featureFile;
+ this.indexedPath = featurePath.toAbsolutePath();
}
public AbstractIndex(final AbstractIndex parent) {
this();
this.version = parent.version;
- this.indexedFile = parent.indexedFile;
+ this.indexedPath = parent.indexedPath;
this.indexedFileSize = parent.indexedFileSize;
this.indexedFileTS = parent.indexedFileTS;
this.indexedFileMD5 = parent.indexedFileMD5;
@@ -200,8 +214,18 @@ public boolean isCurrentVersion() {
return version == VERSION;
}
+ /**
+ * Gets the indexed file.
+ * @throws UnsupportedOperationException if the path cannot be represented as a file.
+ * @deprecated on 03/2017. Use {@link #getIndexedPath()} instead.
+ */
+ @Deprecated
public File getIndexedFile() {
- return indexedFile;
+ return getIndexedPath().toFile();
+ }
+
+ public Path getIndexedPath() {
+ return indexedPath;
}
public long getIndexedFileSize() {
@@ -234,10 +258,14 @@ public boolean containsChromosome(final String chr) {
}
public void finalizeIndex() {
- // these two functions must be called now because the file may be being written during on the fly indexing
- if (indexedFile != null) {
- this.indexedFileSize = indexedFile.length();
- this.indexedFileTS = indexedFile.lastModified();
+ try {
+ // these two functions must be called now because the file may be being written during on the fly indexing
+ if (indexedPath != null) {
+ this.indexedFileSize = Files.size(indexedPath);
+ this.indexedFileTS = Files.getLastModifiedTime(indexedPath).toMillis();
+ }
+ } catch (IOException e) {
+ throw new RuntimeIOException(e);
}
}
@@ -251,7 +279,7 @@ private void writeHeader(final LittleEndianOutputStream dos) throws IOException
dos.writeInt(MAGIC_NUMBER);
dos.writeInt(getType());
dos.writeInt(version);
- dos.writeString(indexedFile.getAbsolutePath());
+ dos.writeString(indexedPath.toUri().toString());
dos.writeLong(indexedFileSize);
dos.writeLong(indexedFileTS);
dos.writeString(indexedFileMD5);
@@ -274,7 +302,7 @@ private void writeHeader(final LittleEndianOutputStream dos) throws IOException
private void readHeader(final LittleEndianInputStream dis) throws IOException {
version = dis.readInt();
- indexedFile = new File(dis.readString());
+ indexedPath = IOUtil.getPath(dis.readString());
indexedFileSize = dis.readLong();
indexedFileTS = dis.readLong();
indexedFileMD5 = dis.readString();
@@ -349,18 +377,22 @@ public void write(final LittleEndianOutputStream stream) throws IOException {
}
@Override
- public void write(final File idxFile) throws IOException {
- try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(idxFile)))) {
+ public void write(final Path idxPath) throws IOException {
+ try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(Files.newOutputStream(idxPath)))) {
write(idxStream);
}
}
@Override
- public void writeBasedOnFeatureFile(final File featureFile) throws IOException {
- if (!featureFile.isFile()) return;
- write(Tribble.indexFile(featureFile));
+ public void writeBasedOnFeaturePath(final Path featurePath) throws IOException {
+ if (!Files.isRegularFile(featurePath)) {
+ logger.warn("Index not written into ", featurePath);
+ return;
+ }
+ write(Tribble.indexPath(featurePath));
}
+
public void read(final LittleEndianInputStream dis) throws IOException {
try {
readHeader(dis);
@@ -386,7 +418,7 @@ public void read(final LittleEndianInputStream dis) throws IOException {
}
protected void printIndexInfo() {
- System.out.println(String.format("Index for %s with %d indices", indexedFile, chrIndices.size()));
+ System.out.println(String.format("Index for %s with %d indices", indexedPath, chrIndices.size()));
final BlockStats stats = getBlockStats(true);
System.out.println(String.format(" total blocks %d", stats.total));
System.out.println(String.format(" total empty blocks %d", stats.empty));
@@ -31,6 +31,7 @@
import htsjdk.tribble.util.MathUtils;
import java.io.File;
+import java.nio.file.Path;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -56,13 +57,15 @@
MathUtils.RunningStat stats = new MathUtils.RunningStat();
long basesSeen = 0;
Feature lastFeature = null;
- File inputFile;
- public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
+ public DynamicIndexCreator(final Path inputPath, final IndexFactory.IndexBalanceApproach iba) {
this.iba = iba;
// get a list of index creators
- this.inputFile = inputFile;
- creators = getIndexCreators(inputFile,iba);
+ creators = getIndexCreators(inputPath, iba);
+ }
+
+ public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
+ this(inputFile.toPath(), iba);
}
@Override
@@ -90,19 +93,19 @@ public Index finalizeIndex(final long finalFilePosition) {
/**
* create a list of index creators (initialized) representing the common index types we'd suspect they'd like to use
- * @param inputFile the input file to use to create the indexes
+ * @param inputPath the input path to use to create the indexes
* @return a map of index type to the best index for that balancing approach
*/
- private Map<IndexFactory.IndexType,TribbleIndexCreator> getIndexCreators(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
+ private Map<IndexFactory.IndexType,TribbleIndexCreator> getIndexCreators(final Path inputPath, final IndexFactory.IndexBalanceApproach iba) {
final Map<IndexFactory.IndexType,TribbleIndexCreator> creators = new HashMap<IndexFactory.IndexType,TribbleIndexCreator>();
if (iba == IndexFactory.IndexBalanceApproach.FOR_SIZE) {
// add a linear index with the default bin size
- final LinearIndexCreator linearNormal = new LinearIndexCreator(inputFile, LinearIndexCreator.DEFAULT_BIN_WIDTH);
+ final LinearIndexCreator linearNormal = new LinearIndexCreator(inputPath, LinearIndexCreator.DEFAULT_BIN_WIDTH);
creators.put(IndexFactory.IndexType.LINEAR,linearNormal);
// create a tree index with the default size
- final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputFile, IntervalIndexCreator.DEFAULT_FEATURE_COUNT);
+ final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputPath, IntervalIndexCreator.DEFAULT_FEATURE_COUNT);
creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeNormal);
}
@@ -111,12 +114,12 @@ public Index finalizeIndex(final long finalFilePosition) {
if (iba == IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) {
// create a linear index with a small bin size
final LinearIndexCreator linearSmallBin =
- new LinearIndexCreator(inputFile, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4));
+ new LinearIndexCreator(inputPath, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4));
creators.put(IndexFactory.IndexType.LINEAR,linearSmallBin);
// create a tree index with a small index size
final IntervalIndexCreator treeSmallBin =
- new IntervalIndexCreator(inputFile, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8));
+ new IntervalIndexCreator(inputPath, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8));
creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeSmallBin);
}
Oops, something went wrong.