diff --git a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java index dfe1c9174..dbd0afc47 100644 --- a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java @@ -3,6 +3,7 @@ import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.LocationAware; import htsjdk.samtools.util.RuntimeIOException; +import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.PositionalBufferedStream; import java.io.IOException; @@ -40,4 +41,12 @@ public boolean isDone(final PositionalBufferedStream source) { throw new RuntimeIOException("Failure reading from stream.", e); } } + + /** + * Marked as final because binary features could not be tabix indexed + */ + @Override + public final TabixFormat getTabixFormat() { + throw new TribbleException("Binary codecs does not support tabix"); + } } diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index b45d8cf8c..f14191a67 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -19,6 +19,7 @@ package htsjdk.tribble; import htsjdk.samtools.util.LocationAware; +import htsjdk.tribble.index.tabix.TabixFormat; import java.io.IOException; import java.io.InputStream; @@ -119,4 +120,17 @@ * @return true if potentialInput can be parsed, false otherwise */ public boolean canDecode(final String path); + + /** + * Define the tabix format for the feature, used for indexing. Default implementation throws an exception. + * + * Note that only {@link AsciiFeatureCodec} could read tabix files as defined in + * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean)} + * + * @return the format to use with tabix + * @throws TribbleException if the format is not defined + */ + default public TabixFormat getTabixFormat() { + throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format"); + } } diff --git a/src/main/java/htsjdk/tribble/bed/BEDCodec.java b/src/main/java/htsjdk/tribble/bed/BEDCodec.java index 0e9185025..62d202c19 100644 --- a/src/main/java/htsjdk/tribble/bed/BEDCodec.java +++ b/src/main/java/htsjdk/tribble/bed/BEDCodec.java @@ -25,6 +25,7 @@ import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.annotation.Strand; +import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.util.ParsingUtils; @@ -224,4 +225,8 @@ public int value() { } } + @Override + public TabixFormat getTabixFormat() { + return TabixFormat.BED; + } } diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index 85fbd72c7..a588220dc 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -260,11 +260,25 @@ public static LinearIndex createLinearIndex(final File inputFile, final FeatureC public static Index createIndex(final File inputFile, final FeatureCodec codec, final IndexType type) { + return createIndex(inputFile, codec, type, null); + } + + /** + * Create an index of the specified type with default binning parameters + * + * @param inputFile the input file to load features from + * @param codec the codec to use for decoding records + * @param type the type of index to create + * @param sequenceDictionary May be null, but if present may reduce memory footprint for tabix index creation + */ + public static Index createIndex(final File inputFile, + final FeatureCodec codec, + final IndexType type, + final SAMSequenceDictionary sequenceDictionary) { switch (type) { case INTERVAL_TREE: return createIntervalIndex(inputFile, codec); case LINEAR: return createLinearIndex(inputFile, codec); - // Tabix index initialization requires additional information, so this construction method won't work. - case TABIX: throw new UnsupportedOperationException("Tabix indices cannot be created through a generic interface"); + case TABIX: return createTabixIndex(inputFile, codec, sequenceDictionary); } throw new IllegalArgumentException("Unrecognized IndexType " + type); } @@ -318,7 +332,18 @@ public static void writeIndex(final Index idx, final File idxFile) throws IOExce return (TabixIndex)createIndex(inputFile, new FeatureIterator(inputFile, codec), indexCreator); } - + /** + * @param inputFile The file to be indexed. + * @param codec the codec to use for decoding records + * @param sequenceDictionary May be null, but if present may reduce memory footprint for index creation. Features + * in inputFile must be in the order defined by sequenceDictionary, if it is present. + * + */ + public static TabixIndex createTabixIndex(final File inputFile, + final FeatureCodec codec, + final SAMSequenceDictionary sequenceDictionary) { + return createTabixIndex(inputFile, codec, codec.getTabixFormat(), sequenceDictionary); + } private static Index createIndex(final File inputFile, final FeatureIterator iterator, final IndexCreator creator) { Feature lastFeature = null; diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java index 7b157ca7c..16857b4e6 100644 --- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java @@ -30,6 +30,7 @@ import htsjdk.tribble.Feature; import htsjdk.tribble.NameAwareCodec; import htsjdk.tribble.TribbleException; +import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.utils.GeneralUtils; import htsjdk.variant.variantcontext.Allele; @@ -782,4 +783,9 @@ protected void generateException(String message) { protected static void generateException(String message, int lineNo) { throw new TribbleException(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message)); } + + @Override + public TabixFormat getTabixFormat() { + return TabixFormat.VCF; + } } diff --git a/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java b/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java index 946609725..eff8939d8 100644 --- a/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java +++ b/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java @@ -54,4 +54,9 @@ public void testBinaryCodec(final File source, final FeatureCodec