diff --git a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java index dd5551efb..de2817c96 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java +++ b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java @@ -37,17 +37,19 @@ import java.util.Set; /** - * Manages header lines for standard VCF INFO and FORMAT fields. + * Manages header lines for standard VCF
INFO
and
FORMAT
fields. * - * Provides simple mechanisms for registering standard lines, - * looking them up, and adding them to headers. + * Provides simple mechanisms for + * 1) registering standard lines, + * 2) looking them up, and + * 3) adding them to headers. * * @author Mark DePristo * @since 6/12 */ public class VCFStandardHeaderLines { /** - * Enabling this causes us to repair header lines even if only their descriptions differ + * Enabling this causes us to repair header lines even if only their descriptions differ. */ private final static boolean REPAIR_BAD_DESCRIPTIONS = false; private static Standards formatStandards = new Standards(); @@ -55,10 +57,7 @@ /** * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly - * allocated VCFHeader with standard VCF header lines repaired as necessary - * - * @param header - * @return + * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary. */ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { final Set newLines = new LinkedHashSet(header.getMetaDataInInputOrder().size()); @@ -77,11 +76,8 @@ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { /** * Adds header lines for each of the format fields in IDs to header, returning the set of - * IDs without standard descriptions, unless throwErrorForMissing is true, in which - * case this situation results in a TribbleException - * - * @param IDs - * @return + * {@code IDs} without standard descriptions, unless {@code throwErrorForMissing} is true, in which + * case this situation results in a {@link TribbleException} */ public static Set addStandardFormatLines(final Set headerLines, final boolean throwErrorForMissing, final Collection IDs) { return formatStandards.addToHeader(headerLines, IDs, throwErrorForMissing); @@ -89,49 +85,31 @@ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { /** * @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection) - * - * @param headerLines - * @param throwErrorForMissing - * @param IDs - * @return */ public static Set addStandardFormatLines(final Set headerLines, final boolean throwErrorForMissing, final String ... IDs) { return addStandardFormatLines(headerLines, throwErrorForMissing, Arrays.asList(IDs)); } /** - * Returns the standard format line for ID. If none exists, return null or throw an exception, depending - * on throwErrorForMissing - * - * @param ID - * @param throwErrorForMissing - * @return + * Returns the standard format line for {@code ID}. + * If none exists, return null or throw an exception, depending on {@code throwErrorForMissing}. */ public static VCFFormatHeaderLine getFormatLine(final String ID, final boolean throwErrorForMissing) { return formatStandards.get(ID, throwErrorForMissing); } /** - * Returns the standard format line for ID. If none exists throw an exception - * - * @param ID - * @return + * Returns the standard format line for {@code ID}. + * If none exists, throw an {@link TribbleException} */ public static VCFFormatHeaderLine getFormatLine(final String ID) { return formatStandards.get(ID, true); } - private static void registerStandard(final VCFFormatHeaderLine line) { - formatStandards.add(line); - } - /** - * Adds header lines for each of the info fields in IDs to header, returning the set of - * IDs without standard descriptions, unless throwErrorForMissing is true, in which - * case this situation results in a TribbleException - * - * @param IDs - * @return + * Adds header lines for each of the info fields in {@code IDs} to header, returning the set of + * IDs without standard descriptions, unless {@code throwErrorForMissing} is true, in which + * case this situation results in a {@link TribbleException}. */ public static Set addStandardInfoLines(final Set headerLines, final boolean throwErrorForMissing, final Collection IDs) { return infoStandards.addToHeader(headerLines, IDs, throwErrorForMissing); @@ -139,65 +117,60 @@ private static void registerStandard(final VCFFormatHeaderLine line) { /** * @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection) - * - * @param IDs - * @return */ public static Set addStandardInfoLines(final Set headerLines, final boolean throwErrorForMissing, final String ... IDs) { return addStandardInfoLines(headerLines, throwErrorForMissing, Arrays.asList(IDs)); } /** - * Returns the standard info line for ID. If none exists, return null or throw an exception, depending - * on throwErrorForMissing - * - * @param ID - * @param throwErrorForMissing - * @return + * Returns the standard info line for {@code ID}. + * If none exists, return {@code null} or throw a {@link TribbleException}, depending on {@code throwErrorForMissing}. */ public static VCFInfoHeaderLine getInfoLine(final String ID, final boolean throwErrorForMissing) { return infoStandards.get(ID, throwErrorForMissing); } /** - * Returns the standard info line for ID. If none exists throw an exception - * - * @param ID - * @return + * Returns the standard info line for {@code ID}. + * If none exists throw a {@link TribbleException}. */ public static VCFInfoHeaderLine getInfoLine(final String ID) { return getInfoLine(ID, true); } + private static void registerStandard(final VCFInfoHeaderLine line) { infoStandards.add(line); } + private static void registerStandard(final VCFFormatHeaderLine line) { + formatStandards.add(line); + } // // VCF header line constants // static { // FORMAT lines - registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); - registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality")); - registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)")); - registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); - registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); - registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter")); - registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality")); + registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); + registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality")); + registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)")); + registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); + registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); + registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter")); + registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality")); // INFO lines - registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); - registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); + registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event")); } private static class Standards { @@ -207,10 +180,10 @@ public T repair(final T line) { final T standard = get(line.getID(), false); if ( standard != null ) { final boolean badCountType = line.getCountType() != standard.getCountType(); - final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount(); - final boolean badType = line.getType() != standard.getType(); - final boolean badDesc = ! line.getDescription().equals(standard.getDescription()); - final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc); + final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount(); + final boolean badType = line.getType() != standard.getType(); + final boolean badDesc = ! line.getDescription().equals(standard.getDescription()); + final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc); if ( needsRepair ) { if ( GeneralUtils.DEBUG_MODE_ENABLED ) { @@ -221,10 +194,12 @@ public T repair(final T line) { + (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": "")); } return standard; - } else + } else { return line; - } else + } + } else { return line; + } } public Set addToHeader(final Set headerLines, final Collection IDs, final boolean throwErrorForMissing) { @@ -241,15 +216,17 @@ public T repair(final T line) { } public void add(final T line) { - if ( standards.containsKey(line.getID()) ) + if ( standards.containsKey(line.getID()) ) { throw new TribbleException("Attempting to add multiple standard header lines for ID " + line.getID()); + } standards.put(line.getID(), line); } public T get(final String ID, final boolean throwErrorForMissing) { final T x = standards.get(ID); - if ( throwErrorForMissing && x == null ) + if ( throwErrorForMissing && x == null ) { throw new TribbleException("Couldn't find a standard VCF header line for field " + ID); + } return x; } } diff --git a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java index 5a8ce6e62..f72cd8797 100644 --- a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java +++ b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java @@ -53,6 +53,10 @@ tests.add(new Object[]{"DP", "info", true}); tests.add(new Object[]{"DB", "info", true}); tests.add(new Object[]{"END", "info", true}); + tests.add(new Object[]{"SB", "info", true}); + tests.add(new Object[]{"MQ", "info", true}); + tests.add(new Object[]{"MQ0", "info", true}); + tests.add(new Object[]{"SOMATIC", "info", true}); // format tests.add(new Object[]{"GT", "format", true}); @@ -60,6 +64,8 @@ tests.add(new Object[]{"DP", "format", true}); tests.add(new Object[]{"AD", "format", true}); tests.add(new Object[]{"PL", "format", true}); + tests.add(new Object[]{"FT", "format", true}); + tests.add(new Object[]{"PQ", "format", true}); tests.add(new Object[]{"NOT_STANDARD", "info", false}); tests.add(new Object[]{"NOT_STANDARD", "format", false}); @@ -81,8 +87,51 @@ else if ( type.equals("format") ) if ( expectedToBeStandard ) { Assert.assertNotNull(line); Assert.assertEquals(line.getID(), key); - } else + Assert.assertTrue(deeperTest(line)); + } else { Assert.assertNull(line); + } + } + + private boolean deeperTest(final VCFCompoundHeaderLine line){ + + final String id = line.getID(); + if(id.equals(VCFConstants.GENOTYPE_KEY)) + return line.getType().equals(VCFHeaderLineType.String) && line.getCount()==1 ; + else if(id.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; + else if(id.equals(VCFConstants.DEPTH_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; + else if(id.equals(VCFConstants.GENOTYPE_PL_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.G); + else if(id.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.R); + else if(id.equals(VCFConstants.GENOTYPE_FILTER_KEY)) + return line.getType().equals(VCFHeaderLineType.String) && line.getCountType().equals(VCFHeaderLineCount.UNBOUNDED); + else if(id.equals(VCFConstants.PHASE_QUALITY_KEY)) + return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1; + else if(id.equals(VCFConstants.END_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; + else if(id.equals(VCFConstants.DBSNP_KEY)) + return line.getType().equals(VCFHeaderLineType.Flag) && line.getCount()==0; + else if(id.equals(VCFConstants.DEPTH_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; + else if(id.equals(VCFConstants.STRAND_BIAS_KEY)) + return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1; + else if(id.equals(VCFConstants.ALLELE_FREQUENCY_KEY)) + return line.getType().equals(VCFHeaderLineType.Float) && line.getCountType().equals(VCFHeaderLineCount.A); + else if(id.equals(VCFConstants.ALLELE_COUNT_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.A); + else if(id.equals(VCFConstants.ALLELE_NUMBER_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; + else if(id.equals(VCFConstants.MAPPING_QUALITY_ZERO_KEY)) + return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; + else if(id.equals(VCFConstants.RMS_MAPPING_QUALITY_KEY)) + return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1; + else if(id.equals(VCFConstants.SOMATIC_KEY)) + return line.getType().equals(VCFHeaderLineType.Flag) && line.getCount()==0; + else + throw new IllegalArgumentException("Unexpected id : " + id); } private class RepairHeaderTest { @@ -137,7 +186,7 @@ public String toString() { } @Test(dataProvider = "RepairHeaderTest") - public void testRepairHeaderTest(RepairHeaderTest cfg) { + public void testRepairHeaderTest(final RepairHeaderTest cfg) { final VCFHeader toRepair = new VCFHeader(Collections.singleton((VCFHeaderLine)cfg.original)); final VCFHeader repaired = VCFStandardHeaderLines.repairStandardHeaderLines(toRepair); @@ -148,7 +197,8 @@ public void testRepairHeaderTest(RepairHeaderTest cfg) { Assert.assertEquals(repairedLine.getID(), cfg.expectedResult.getID()); Assert.assertEquals(repairedLine.getType(), cfg.expectedResult.getType()); Assert.assertEquals(repairedLine.getCountType(), cfg.expectedResult.getCountType()); - if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER ) + if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER ) { Assert.assertEquals(repairedLine.getCount(), cfg.expectedResult.getCount()); + } } }