Permalink
Browse files

added unit tests, tiny clean up for VCFStandardHeaderLines

  • Loading branch information...
1 parent ee05a20 commit 9ceb00fb6f19de66afb15f0ee1082653972db943 @SHuang-Broad SHuang-Broad committed Jul 10, 2016
@@ -37,28 +37,27 @@
import java.util.Set;
/**
- * Manages header lines for standard VCF INFO and FORMAT fields.
+ * Manages header lines for standard VCF <pre>INFO</pre> and <pre>FORMAT</pre> fields.
*
- * Provides simple mechanisms for registering standard lines,
- * looking them up, and adding them to headers.
+ * Provides simple mechanisms for
+ * 1) registering standard lines,
+ * 2) looking them up, and
+ * 3) adding them to headers.
*
* @author Mark DePristo
* @since 6/12
*/
public class VCFStandardHeaderLines {
/**
- * Enabling this causes us to repair header lines even if only their descriptions differ
+ * Enabling this causes us to repair header lines even if only their descriptions differ.
*/
private final static boolean REPAIR_BAD_DESCRIPTIONS = false;
private static Standards<VCFFormatHeaderLine> formatStandards = new Standards<VCFFormatHeaderLine>();
private static Standards<VCFInfoHeaderLine> infoStandards = new Standards<VCFInfoHeaderLine>();
/**
* Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly
- * allocated VCFHeader with standard VCF header lines repaired as necessary
- *
- * @param header
- * @return
+ * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary.
*/
public static VCFHeader repairStandardHeaderLines(final VCFHeader header) {
final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder().size());
@@ -77,127 +76,101 @@ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) {
/**
* Adds header lines for each of the format fields in IDs to header, returning the set of
- * IDs without standard descriptions, unless throwErrorForMissing is true, in which
- * case this situation results in a TribbleException
- *
- * @param IDs
- * @return
+ * {@code IDs} without standard descriptions, unless {@code throwErrorForMissing} is true, in which
+ * case this situation results in a {@link TribbleException}
*/
public static Set<String> addStandardFormatLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final Collection<String> IDs) {
return formatStandards.addToHeader(headerLines, IDs, throwErrorForMissing);
}
/**
* @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection)
- *
- * @param headerLines
- * @param throwErrorForMissing
- * @param IDs
- * @return
*/
public static Set<String> addStandardFormatLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final String ... IDs) {
return addStandardFormatLines(headerLines, throwErrorForMissing, Arrays.asList(IDs));
}
/**
- * Returns the standard format line for ID. If none exists, return null or throw an exception, depending
- * on throwErrorForMissing
- *
- * @param ID
- * @param throwErrorForMissing
- * @return
+ * Returns the standard format line for {@code ID}.
+ * If none exists, return null or throw an exception, depending on {@code throwErrorForMissing}.
*/
public static VCFFormatHeaderLine getFormatLine(final String ID, final boolean throwErrorForMissing) {
return formatStandards.get(ID, throwErrorForMissing);
}
/**
- * Returns the standard format line for ID. If none exists throw an exception
- *
- * @param ID
- * @return
+ * Returns the standard format line for {@code ID}.
+ * If none exists, throw an {@link TribbleException}
*/
public static VCFFormatHeaderLine getFormatLine(final String ID) {
return formatStandards.get(ID, true);
}
- private static void registerStandard(final VCFFormatHeaderLine line) {
- formatStandards.add(line);
- }
-
/**
- * Adds header lines for each of the info fields in IDs to header, returning the set of
- * IDs without standard descriptions, unless throwErrorForMissing is true, in which
- * case this situation results in a TribbleException
- *
- * @param IDs
- * @return
+ * Adds header lines for each of the info fields in {@code IDs} to header, returning the set of
+ * IDs without standard descriptions, unless {@code throwErrorForMissing} is true, in which
+ * case this situation results in a {@link TribbleException}.
*/
public static Set<String> addStandardInfoLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final Collection<String> IDs) {
return infoStandards.addToHeader(headerLines, IDs, throwErrorForMissing);
}
/**
* @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection)
- *
- * @param IDs
- * @return
*/
public static Set<String> addStandardInfoLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final String ... IDs) {
return addStandardInfoLines(headerLines, throwErrorForMissing, Arrays.asList(IDs));
}
/**
- * Returns the standard info line for ID. If none exists, return null or throw an exception, depending
- * on throwErrorForMissing
- *
- * @param ID
- * @param throwErrorForMissing
- * @return
+ * Returns the standard info line for {@code ID}.
+ * If none exists, return {@code null} or throw a {@link TribbleException}, depending on {@code throwErrorForMissing}.
*/
public static VCFInfoHeaderLine getInfoLine(final String ID, final boolean throwErrorForMissing) {
return infoStandards.get(ID, throwErrorForMissing);
}
/**
- * Returns the standard info line for ID. If none exists throw an exception
- *
- * @param ID
- * @return
+ * Returns the standard info line for {@code ID}.
+ * If none exists throw a {@link TribbleException}.
*/
public static VCFInfoHeaderLine getInfoLine(final String ID) {
return getInfoLine(ID, true);
}
+
private static void registerStandard(final VCFInfoHeaderLine line) {
infoStandards.add(line);
}
+ private static void registerStandard(final VCFFormatHeaderLine line) {
+ formatStandards.add(line);
+ }
//
// VCF header line constants
//
static {
// FORMAT lines
- registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
- registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
- registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
- registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
- registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
- registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter"));
- registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality"));
// INFO lines
- registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality"));
- registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality"));
+ registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
}
private static class Standards<T extends VCFCompoundHeaderLine> {
@@ -207,10 +180,10 @@ public T repair(final T line) {
final T standard = get(line.getID(), false);
if ( standard != null ) {
final boolean badCountType = line.getCountType() != standard.getCountType();
- final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount();
- final boolean badType = line.getType() != standard.getType();
- final boolean badDesc = ! line.getDescription().equals(standard.getDescription());
- final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc);
+ final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount();
+ final boolean badType = line.getType() != standard.getType();
+ final boolean badDesc = ! line.getDescription().equals(standard.getDescription());
+ final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc);
if ( needsRepair ) {
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
@@ -221,10 +194,12 @@ public T repair(final T line) {
+ (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
}
return standard;
- } else
+ } else {
return line;
- } else
+ }
+ } else {
return line;
+ }
}
public Set<String> addToHeader(final Set<VCFHeaderLine> headerLines, final Collection<String> IDs, final boolean throwErrorForMissing) {
@@ -241,15 +216,17 @@ public T repair(final T line) {
}
public void add(final T line) {
- if ( standards.containsKey(line.getID()) )
+ if ( standards.containsKey(line.getID()) ) {
throw new TribbleException("Attempting to add multiple standard header lines for ID " + line.getID());
+ }
standards.put(line.getID(), line);
}
public T get(final String ID, final boolean throwErrorForMissing) {
final T x = standards.get(ID);
- if ( throwErrorForMissing && x == null )
+ if ( throwErrorForMissing && x == null ) {
throw new TribbleException("Couldn't find a standard VCF header line for field " + ID);
+ }
return x;
}
}
@@ -53,13 +53,19 @@
tests.add(new Object[]{"DP", "info", true});
tests.add(new Object[]{"DB", "info", true});
tests.add(new Object[]{"END", "info", true});
+ tests.add(new Object[]{"SB", "info", true});
+ tests.add(new Object[]{"MQ", "info", true});
+ tests.add(new Object[]{"MQ0", "info", true});
+ tests.add(new Object[]{"SOMATIC", "info", true});
// format
tests.add(new Object[]{"GT", "format", true});
tests.add(new Object[]{"GQ", "format", true});
tests.add(new Object[]{"DP", "format", true});
tests.add(new Object[]{"AD", "format", true});
tests.add(new Object[]{"PL", "format", true});
+ tests.add(new Object[]{"FT", "format", true});
+ tests.add(new Object[]{"PQ", "format", true});
tests.add(new Object[]{"NOT_STANDARD", "info", false});
tests.add(new Object[]{"NOT_STANDARD", "format", false});
@@ -81,8 +87,51 @@ else if ( type.equals("format") )
if ( expectedToBeStandard ) {
Assert.assertNotNull(line);
Assert.assertEquals(line.getID(), key);
- } else
+ Assert.assertTrue(deeperTest(line));
+ } else {
Assert.assertNull(line);
+ }
+ }
+
+ private boolean deeperTest(final VCFCompoundHeaderLine line){
+
+ final String id = line.getID();
+ if(id.equals(VCFConstants.GENOTYPE_KEY))
+ return line.getType().equals(VCFHeaderLineType.String) && line.getCount()==1 ;
+ else if(id.equals(VCFConstants.GENOTYPE_QUALITY_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1;
+ else if(id.equals(VCFConstants.DEPTH_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1;
+ else if(id.equals(VCFConstants.GENOTYPE_PL_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.G);
+ else if(id.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.R);
+ else if(id.equals(VCFConstants.GENOTYPE_FILTER_KEY))
+ return line.getType().equals(VCFHeaderLineType.String) && line.getCountType().equals(VCFHeaderLineCount.UNBOUNDED);
+ else if(id.equals(VCFConstants.PHASE_QUALITY_KEY))
+ return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1;
+ else if(id.equals(VCFConstants.END_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1;
+ else if(id.equals(VCFConstants.DBSNP_KEY))
+ return line.getType().equals(VCFHeaderLineType.Flag) && line.getCount()==0;
+ else if(id.equals(VCFConstants.DEPTH_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1;
+ else if(id.equals(VCFConstants.STRAND_BIAS_KEY))
+ return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1;
+ else if(id.equals(VCFConstants.ALLELE_FREQUENCY_KEY))
+ return line.getType().equals(VCFHeaderLineType.Float) && line.getCountType().equals(VCFHeaderLineCount.A);
+ else if(id.equals(VCFConstants.ALLELE_COUNT_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.A);
+ else if(id.equals(VCFConstants.ALLELE_NUMBER_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1;
+ else if(id.equals(VCFConstants.MAPPING_QUALITY_ZERO_KEY))
+ return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1;
+ else if(id.equals(VCFConstants.RMS_MAPPING_QUALITY_KEY))
+ return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1;
+ else if(id.equals(VCFConstants.SOMATIC_KEY))
+ return line.getType().equals(VCFHeaderLineType.Flag) && line.getCount()==0;
+ else
+ throw new IllegalArgumentException("Unexpected id : " + id);
}
private class RepairHeaderTest {
@@ -137,7 +186,7 @@ public String toString() {
}
@Test(dataProvider = "RepairHeaderTest")
- public void testRepairHeaderTest(RepairHeaderTest cfg) {
+ public void testRepairHeaderTest(final RepairHeaderTest cfg) {
final VCFHeader toRepair = new VCFHeader(Collections.singleton((VCFHeaderLine)cfg.original));
final VCFHeader repaired = VCFStandardHeaderLines.repairStandardHeaderLines(toRepair);
@@ -148,7 +197,8 @@ public void testRepairHeaderTest(RepairHeaderTest cfg) {
Assert.assertEquals(repairedLine.getID(), cfg.expectedResult.getID());
Assert.assertEquals(repairedLine.getType(), cfg.expectedResult.getType());
Assert.assertEquals(repairedLine.getCountType(), cfg.expectedResult.getCountType());
- if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER )
+ if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER ) {
Assert.assertEquals(repairedLine.getCount(), cfg.expectedResult.getCount());
+ }
}
}

0 comments on commit 9ceb00f

Please sign in to comment.