new enum htsjdk.variant.variantcontext.StructuralVariantType describing the SV types #696

Merged
merged 2 commits into from Nov 1, 2016
Jump to file or symbol
Failed to load files and symbols.
+113 −1
Split
@@ -0,0 +1,47 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.variant.variantcontext;
+
+/**
+ * Type of Structural Variant as defined in the VCF spec 4.2
+ *
+ */
+public enum StructuralVariantType {
+ /** Deletion relative to the reference */
+ DEL,
+ /** Insertion of novel sequence relative to the reference */
+ INS,
+ /** Region of elevated copy number relative to the reference */
+ DUP,
+ /** Inversion of reference sequence */
+ INV,
+ /** Copy number variable region */
+ CNV,
+ /** breakend structural variation. VCF Specification : <cite>An arbitrary rearrangement
+ * event can be summarized as a set of novel adjacencies.
+ * Each adjacency ties together two breakends.</cite>
+ */
+ BND
+}
@@ -1732,4 +1732,13 @@ public int getAlleleIndex(final Allele allele) {
if ( index == -1 ) throw new IllegalArgumentException("Allele " + targetAllele + " not in this VariantContex " + this);
return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index);
}
+
+ /**
+ * Search for the INFO=SVTYPE and return the type of Structural Variant
+ * @return the StructuralVariantType of null if there is no property SVTYPE
+ * */
+ public StructuralVariantType getStructuralVariantType() {
+ final String svType = this.getAttributeAsString(VCFConstants.SVTYPE, null);
+ return svType == null ? null : StructuralVariantType.valueOf(svType);
+ }
}
@@ -63,7 +63,10 @@
public static final String SOMATIC_KEY = "SOMATIC";
public static final String VALIDATED_KEY = "VALIDATED";
public static final String THOUSAND_GENOMES_KEY = "1000G";
-
+
+ // reserved INFO for structural variants
+ /** INFO Type of structural variant */
+ public static final String SVTYPE = "SVTYPE";
// separators
public static final String FORMAT_FIELD_SEPARATOR = ":";
@@ -26,6 +26,9 @@
package htsjdk.variant.variantcontext;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.CloserUtil;
+
// the imports for unit testing.
import htsjdk.samtools.util.TestUtil;
@@ -37,6 +40,8 @@
import htsjdk.tribble.TribbleException;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFFileReader;
+
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeSuite;
@@ -1447,4 +1452,30 @@ public void testExtraStrictValidationFailure(final VariantContext vc, final Alle
// extraStrictValidation throws exceptions if it fails, so no Asserts here...
vc.extraStrictValidation(reportedReference, observedReference, rsIDs);
}
+
+
+ @DataProvider(name = "structuralVariationsTestData")
+ public Object[][] getStructuralVariationsTestData() {
+ return new Object[][] {
+ {new File("src/test/resources/htsjdk/variant/structuralvariants.vcf")}
+ };
+ }
+
+ @Test(dataProvider = "structuralVariationsTestData")
+ public void testExtractStructuralVariationsData(final File vcfFile) {
+ VCFFileReader reader = null;
+ CloseableIterator<VariantContext> iter = null;
+ try {
+ reader = new VCFFileReader(vcfFile , false );
+ iter = reader.iterator();
+ while(iter.hasNext()) {
+ final VariantContext ctx = iter.next();
+ final StructuralVariantType st = ctx.getStructuralVariantType();
+ Assert.assertNotNull(st);
+ }
+ } finally {
+ CloserUtil.close(iter);
+ CloserUtil.close(reader);
+ }
+ }
}
@@ -0,0 +1,22 @@
+##fileformat=VCFv4.2
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=END_CHR,Number=A,Type=String,Description="End chromosome of the variant described in this record">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=A,Type=String,Description="Type of structural variant">
+##INFO=<ID=STRAND_1,Number=1,Type=String,Description="Strand Orientation of SV Start">
+##INFO=<ID=STRAND_2,Number=1,Type=String,Description="Strand Orientation of SV End">
+##INFO=<ID=METHOD,Number=1,Type=String,Description="SV Caller used to predict">
+##INFO=<ID=DP,Number=1,Type=String,Description="combined depth across samples">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=AO,Number=1,Type=Integer,Description="Alternate Allele Observations">
+##contig=<ID=1,length=14640000>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
+1 20 . N <DUP> 1 . IMPRECISE;SVTYPE=DUP;END=4641652;END_CHR=1;STRAND_1=-;STRAND_2=+;SVLEN=4641632;METHOD=LUMPY;DP=90 GT:AO 1/1:90
+1 33 . N <DUP> 1 . IMPRECISE;SVTYPE=DUP;END=2640388;END_CHR=1;STRAND_1=-;STRAND_2=+;SVLEN=2640355;METHOD=LUMPY;DP=3 GT:AO 1/1:3
+1 70 . N <DEL> 1 . IMPRECISE;SVTYPE=DEL;END=4641583;END_CHR=1;STRAND_1=+;STRAND_2=-;SVLEN=-4641513;METHOD=LUMPY;DP=1 GT:AO 1/1:1
+1 101 . N <INV> 1 . IMPRECISE;SVTYPE=INV;END=1988714;END_CHR=1;STRAND_1=-;STRAND_2=-;SVLEN=1988613;METHOD=LUMPY;DP=2 GT:AO 1/1:2