Permalink
Please sign in to comment.
Browse files
Starting
New filter to keep pairs of interval together Responding to some review comments Java 8-ified test Updating tests and decided on what to do about non primary alignments Responding to review comments round 2 Removing an unused import Forgot to delete out a commented block of code. Ah Responding to Yossi's review comments
- Loading branch information...
Showing
with
235 additions
and 1 deletion.
| @@ -0,0 +1,108 @@ | ||
| +/* | ||
| + * The MIT License | ||
| + * | ||
| + * Copyright (c) 2016 The Broad Institute | ||
| + * | ||
| + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| + * of this software and associated documentation files (the "Software"), to deal | ||
| + * in the Software without restriction, including without limitation the rights | ||
| + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| + * copies of the Software, and to permit persons to whom the Software is | ||
| + * furnished to do so, subject to the following conditions: | ||
| + * | ||
| + * The above copyright notice and this permission notice shall be included in | ||
| + * all copies or substantial portions of the Software. | ||
| + * | ||
| + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| + * THE SOFTWARE. | ||
| + */ | ||
| +package htsjdk.samtools.filter; | ||
| + | ||
| +import htsjdk.samtools.SAMRecord; | ||
| +import htsjdk.samtools.SAMUtils; | ||
| +import htsjdk.samtools.util.Interval; | ||
| +import htsjdk.samtools.util.OverlapDetector; | ||
| + | ||
| +import java.util.Collection; | ||
| +import java.util.List; | ||
| + | ||
| +/** | ||
| + * Filter out SAMRecords where neither record of a pair overlaps a given set of | ||
| + * intervals. If one record of a pair overlaps the interval list, than both are | ||
| + * kept. It is required that the SAMRecords are passed in coordinate order, have | ||
| + * non-null SAMFileHeaders, and that Mate Cigar (MC) is present. | ||
| + * | ||
| + * @author kbergin@broadinstitute.org | ||
| + */ | ||
| +public class IntervalKeepPairFilter implements SamRecordFilter { | ||
| + private final OverlapDetector<Interval> intervalOverlapDetector; | ||
| + | ||
| + /** | ||
| + * Prepare to filter out SAMRecords that do not overlap the given list of | ||
| + * intervals | ||
| + * @param intervals | ||
| + */ | ||
| + public IntervalKeepPairFilter(final List<Interval> intervals) { | ||
| + this.intervalOverlapDetector = new OverlapDetector<>(0, 0); | ||
| + this.intervalOverlapDetector.addAll(intervals, intervals); | ||
| + } | ||
| + | ||
| + /** | ||
| + * Determines whether a SAMRecord matches this filter. Takes record, finds | ||
| + * the location of its mate using the MC tag. Checks if either record | ||
| + * overlaps the current interval using overlap detector. If yes, return | ||
| + * false -> don't filter it out. | ||
| + * | ||
| + * If a read is secondary or supplementary, filter read out. Use | ||
| + * {@link IntervalFilter} if you want to keep these reads, but NOTE: the | ||
| + * resulting bam may not be valid. | ||
| + * | ||
| + * @param record the SAMRecord to evaluate | ||
| + * @return true if the SAMRecord matches the filter, otherwise false | ||
| + */ | ||
| + public boolean filterOut(final SAMRecord record) { | ||
| + if (record.isSecondaryOrSupplementary()) { | ||
| + return true; | ||
| + } | ||
| + | ||
| + if (!record.getReadUnmappedFlag() | ||
| + && hasOverlaps(record.getReferenceName(), record.getStart(), record.getEnd())) { | ||
| + return false; | ||
| + } | ||
| + | ||
| + return record.getMateUnmappedFlag() || !hasOverlaps(record.getMateReferenceName(), | ||
| + record.getMateAlignmentStart(), SAMUtils.getMateAlignmentEnd(record)); | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns true if the record overlaps any intervals in list, false otherwise. | ||
| + * | ||
| + * @param refSequence Reference contig name where record maps | ||
| + * @param start Record alignment start | ||
| + * @param end Record alignment end | ||
| + * @return true if SAMRecord overlaps any intervals in list | ||
| + */ | ||
| + private boolean hasOverlaps(final String refSequence, final int start, final int end) { | ||
| + final Interval readInterval = new Interval(refSequence, start, end); | ||
| + final Collection<Interval> overlapsRead = intervalOverlapDetector.getOverlaps(readInterval); | ||
| + | ||
| + return !overlapsRead.isEmpty(); | ||
| + } | ||
| + | ||
| + /** | ||
| + * Determines whether a pair of SAMRecord matches this filter | ||
| + * | ||
| + * @param first the first SAMRecord to evaluate | ||
| + * @param second the second SAMRecord to evaluate | ||
| + * | ||
| + * @return true if both SAMRecords do not overlap the interval list | ||
| + */ | ||
| + public boolean filterOut(final SAMRecord first, final SAMRecord second) { | ||
| + return filterOut(first) && filterOut(second); | ||
| + } | ||
| +} |
| @@ -0,0 +1,123 @@ | ||
| +package htsjdk.samtools.filter; | ||
| + | ||
| +import htsjdk.samtools.SAMRecordSetBuilder; | ||
| +import htsjdk.samtools.util.CollectionUtil; | ||
| +import org.testng.Assert; | ||
| +import org.testng.annotations.BeforeTest; | ||
| +import org.testng.annotations.DataProvider; | ||
| +import org.testng.annotations.Test; | ||
| +import htsjdk.samtools.util.Interval; | ||
| +import java.util.List; | ||
| +import java.util.ArrayList; | ||
| +import java.util.stream.StreamSupport; | ||
| + | ||
| +public class IntervalKeepPairFilterTest { | ||
| + private static final int READ_LENGTH = 151; | ||
| + private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); | ||
| + | ||
| + @BeforeTest | ||
| + public void setUp() { | ||
| + builder.setReadLength(READ_LENGTH); | ||
| + // Will be kept when an interval overlaps chromosome 1 in the first 151 | ||
| + // bases. | ||
| + builder.addPair("mapped_pair_chr1", 0, 1, 151); | ||
| + // Will be kept when an interval overlaps chromsome 2 in the first 151 | ||
| + // bases. | ||
| + builder.addPair("mapped_pair_chr2", 1, 1, 151); | ||
| + // The first read should pass and second should not, but both will | ||
| + // be kept in first test. | ||
| + builder.addPair("one_of_pair", 0, 1, 1000); | ||
| + // The second read is unmapped, but both should be kept in an | ||
| + // interval test where the interval includes chromosome four, where | ||
| + // read one will overlap. | ||
| + builder.addPair("second_mate_unmapped", 3, -1, 1, 1000, false, true, | ||
| + "151M", null, false, false, false, false, -1); | ||
| + // The first read is unmapped but both should be kept in an | ||
| + // interval test where the interval includes chromosome four, where | ||
| + // read two will overlap. | ||
| + builder.addPair("first_mate_unmapped", -1, 3, 1000, 1, true, false, | ||
| + null, "151M", false, false, false, false, -1); | ||
| + // This pair will overlap any interval that includes chromosome 1:1000 | ||
| + builder.addPair("prove_one_of_pair", 0, 1000, 1000); | ||
| + // These reads are unmapped and will not map to any intervals, so they | ||
| + // are never kept. This is tested below. | ||
| + builder.addPair("both_unmapped", -1, -1, 1, 1, true, true, null, null, | ||
| + false, false, false, false, -1); | ||
| + // Secondary alignments are never kept by the interval filter. | ||
| + builder.addFrag("mapped_pair_chr1", 0, 1, false, false, "151M", null, -1, true, false); | ||
| + // Supplementary alignment are never kept by the interval filter. | ||
| + builder.addFrag("mapped_pair_chr1", 0, 1, false, false, "151M", null, -1, false, true); | ||
| + } | ||
| + | ||
| + @Test(dataProvider = "testData") | ||
| + public void testIntervalPairFilter(final List<Interval> intervals, final long expectedPassingRecords) { | ||
| + final IntervalKeepPairFilter filter = new IntervalKeepPairFilter(intervals); | ||
| + | ||
| + long actualPassingRecords = StreamSupport.stream(builder.spliterator(), false) | ||
| + .filter(rec -> !filter.filterOut(rec)) | ||
| + .count(); | ||
| + | ||
| + Assert.assertEquals(actualPassingRecords, expectedPassingRecords); | ||
| + } | ||
| + | ||
| + @Test | ||
| + public void testUnmappedPair() { | ||
| + final List<Interval> intervalList = new ArrayList<>(); | ||
| + | ||
| + final Interval interval1 = new Interval("chr1", 1, 999); | ||
| + final Interval interval2 = new Interval("chr3", 1, 2); | ||
| + final Interval interval3 = new Interval("chr2", 1, 2); | ||
| + final Interval interval4 = new Interval("chr4", 1, 2); | ||
| + | ||
| + intervalList.addAll(CollectionUtil.makeList(interval1, interval2, interval3, interval4)); | ||
| + | ||
| + final IntervalKeepPairFilter filter = new IntervalKeepPairFilter(intervalList); | ||
| + | ||
| + boolean unmappedPassed = StreamSupport.stream(builder.spliterator(), false) | ||
| + .filter(rec -> !filter.filterOut(rec)) | ||
| + .anyMatch(rec -> rec.getReadName().equals("both_unmapped")); | ||
| + | ||
| + Assert.assertFalse(unmappedPassed); | ||
| + } | ||
| + | ||
| + @Test | ||
| + public void testNotPrimaryReads() { | ||
| + final List<Interval> intervalList = new ArrayList<>(); | ||
| + final Interval interval1 = new Interval("chr1", 1, 999); | ||
| + intervalList.add(interval1); | ||
| + | ||
| + final IntervalKeepPairFilter filter = new IntervalKeepPairFilter(intervalList); | ||
| + | ||
| + boolean notPrimary = StreamSupport.stream(builder.spliterator(), false) | ||
| + .filter(rec -> !filter.filterOut(rec)) | ||
| + .anyMatch(rec -> rec.getNotPrimaryAlignmentFlag() || rec.getSupplementaryAlignmentFlag()); | ||
| + | ||
| + Assert.assertFalse(notPrimary); | ||
| + } | ||
| + | ||
| + @DataProvider(name = "testData") | ||
| + private Object[][] testData() { | ||
| + Interval interval = new Interval("chr1", 1, 999); | ||
| + final List<Interval> intervalList_twoPair = new ArrayList<>(); | ||
| + intervalList_twoPair.add(interval); | ||
| + | ||
| + interval = new Interval("chr3", 1, 2); | ||
| + final List<Interval> intervalList_noMatch = new ArrayList<>(); | ||
| + intervalList_noMatch.add(interval); | ||
| + | ||
| + interval = new Interval("chr2", 1, 2); | ||
| + final List<Interval> intervalList_onePair = new ArrayList<>(); | ||
| + intervalList_onePair.add(interval); | ||
| + | ||
| + interval = new Interval("chr4", 1, 2); | ||
| + final List<Interval> intervalList_unmapped = new ArrayList<>(); | ||
| + intervalList_unmapped.add(interval); | ||
| + | ||
| + return new Object[][]{ | ||
| + {intervalList_twoPair, 4}, | ||
| + {intervalList_noMatch, 0}, | ||
| + {intervalList_onePair, 2}, | ||
| + {intervalList_unmapped, 4} | ||
| + }; | ||
| + } | ||
| +} |
0 comments on commit
702e77f