Permalink
Browse files

FeatureCodec.getTabixFormat() to encapsulate tabix formatting (#669)

* added getTabixFormat method to FeatureCodec interface to index files with tabix when the format is defined in implementors

* changed as a default method in FeatureCodec

* added javadoc param description

* added tests and final getTabixFormat for binary codecs

* addressed comments
  • Loading branch information...
1 parent 8ef565a commit fba46371c71bf8ff7f3e2b56b97697de41a02f89 @magicDGS magicDGS committed with yfarjoun Aug 10, 2016
@@ -3,6 +3,7 @@
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.LocationAware;
import htsjdk.samtools.util.RuntimeIOException;
+import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.readers.PositionalBufferedStream;
import java.io.IOException;
@@ -40,4 +41,12 @@ public boolean isDone(final PositionalBufferedStream source) {
throw new RuntimeIOException("Failure reading from stream.", e);
}
}
+
+ /**
+ * Marked as final because binary features could not be tabix indexed
+ */
+ @Override
+ public final TabixFormat getTabixFormat() {
+ throw new TribbleException("Binary codecs does not support tabix");
+ }
}
@@ -19,6 +19,7 @@
package htsjdk.tribble;
import htsjdk.samtools.util.LocationAware;
+import htsjdk.tribble.index.tabix.TabixFormat;
import java.io.IOException;
import java.io.InputStream;
@@ -119,4 +120,17 @@
* @return true if potentialInput can be parsed, false otherwise
*/
public boolean canDecode(final String path);
+
+ /**
+ * Define the tabix format for the feature, used for indexing. Default implementation throws an exception.
+ *
+ * Note that only {@link AsciiFeatureCodec} could read tabix files as defined in
+ * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean)}
+ *
+ * @return the format to use with tabix
+ * @throws TribbleException if the format is not defined
+ */
+ default public TabixFormat getTabixFormat() {
+ throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format");
+ }
}
@@ -25,6 +25,7 @@
import htsjdk.tribble.AsciiFeatureCodec;
import htsjdk.tribble.annotation.Strand;
+import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.util.ParsingUtils;
@@ -224,4 +225,8 @@ public int value() {
}
}
+ @Override
+ public TabixFormat getTabixFormat() {
+ return TabixFormat.BED;
+ }
}
@@ -260,11 +260,25 @@ public static LinearIndex createLinearIndex(final File inputFile, final FeatureC
public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createIndex(final File inputFile,
final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
final IndexType type) {
+ return createIndex(inputFile, codec, type, null);
+ }
+
+ /**
+ * Create an index of the specified type with default binning parameters
+ *
+ * @param inputFile the input file to load features from
+ * @param codec the codec to use for decoding records
+ * @param type the type of index to create
+ * @param sequenceDictionary May be null, but if present may reduce memory footprint for tabix index creation
+ */
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
+ final IndexType type,
+ final SAMSequenceDictionary sequenceDictionary) {
switch (type) {
case INTERVAL_TREE: return createIntervalIndex(inputFile, codec);
case LINEAR: return createLinearIndex(inputFile, codec);
- // Tabix index initialization requires additional information, so this construction method won't work.
- case TABIX: throw new UnsupportedOperationException("Tabix indices cannot be created through a generic interface");
+ case TABIX: return createTabixIndex(inputFile, codec, sequenceDictionary);
}
throw new IllegalArgumentException("Unrecognized IndexType " + type);
}
@@ -318,7 +332,18 @@ public static void writeIndex(final Index idx, final File idxFile) throws IOExce
return (TabixIndex)createIndex(inputFile, new FeatureIterator<FEATURE_TYPE, SOURCE_TYPE>(inputFile, codec), indexCreator);
}
-
+ /**
+ * @param inputFile The file to be indexed.
+ * @param codec the codec to use for decoding records
+ * @param sequenceDictionary May be null, but if present may reduce memory footprint for index creation. Features
+ * in inputFile must be in the order defined by sequenceDictionary, if it is present.
+ *
+ */
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> TabixIndex createTabixIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
+ final SAMSequenceDictionary sequenceDictionary) {
+ return createTabixIndex(inputFile, codec, codec.getTabixFormat(), sequenceDictionary);
+ }
private static Index createIndex(final File inputFile, final FeatureIterator iterator, final IndexCreator creator) {
Feature lastFeature = null;
@@ -30,6 +30,7 @@
import htsjdk.tribble.Feature;
import htsjdk.tribble.NameAwareCodec;
import htsjdk.tribble.TribbleException;
+import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.util.ParsingUtils;
import htsjdk.variant.utils.GeneralUtils;
import htsjdk.variant.variantcontext.Allele;
@@ -782,4 +783,9 @@ protected void generateException(String message) {
protected static void generateException(String message, int lineNo) {
throw new TribbleException(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message));
}
+
+ @Override
+ public TabixFormat getTabixFormat() {
+ return TabixFormat.VCF;
+ }
}
@@ -54,4 +54,9 @@ public void testBinaryCodec(final File source, final FeatureCodec<Feature, LineI
originalReader.close();
binaryReader.close();
}
+
+ @Test(expectedExceptions = TribbleException.class)
+ public void testGetTabixFormatThrowsException() {
+ new ExampleBinaryCodec().getTabixFormat();
+ }
}
@@ -31,6 +31,7 @@
import htsjdk.tribble.bed.FullBEDFeature.Exon;
import htsjdk.tribble.index.IndexFactory;
import htsjdk.tribble.index.linear.LinearIndex;
+import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.util.LittleEndianOutputStream;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -219,6 +220,10 @@ private void createIndex(File testFile, File idxFile) throws IOException {
stream.close();
}
}
+ }
+ @Test
+ public void testGetTabixFormat() {
+ Assert.assertEquals(new BEDCodec().getTabixFormat(), TabixFormat.BED);
}
}
@@ -1,6 +1,7 @@
package htsjdk.variant.vcf;
import htsjdk.tribble.TribbleException;
+import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
@@ -14,6 +15,7 @@
public class AbstractVCFCodecTest extends VariantBaseTest {
+
@Test
public void shouldPreserveSymbolicAlleleCase() {
VCFFileReader reader = new VCFFileReader(new File(VariantBaseTest.variantTestDataRoot + "breakpoint.vcf"), false);
@@ -50,4 +52,9 @@ public void testCanDecodeFile(String potentialInput, boolean canDecode) {
Assert.assertEquals(AbstractVCFCodec.canDecodeFile(potentialInput, VCFCodec.VCF4_MAGIC_HEADER), canDecode);
}
+ @Test
+ public void testGetTabixFormat() {
+ Assert.assertEquals(new VCFCodec().getTabixFormat(), TabixFormat.VCF);
+ Assert.assertEquals(new VCF3Codec().getTabixFormat(), TabixFormat.VCF);
+ }
}

0 comments on commit fba4637

Please sign in to comment.