diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java index 929671a84..26403c512 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalList.java +++ b/src/main/java/htsjdk/samtools/util/IntervalList.java @@ -739,6 +739,64 @@ public static IntervalList difference(final Collection lists1, fin subtract(lists2, lists1)); } + /** + * A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval + * in the second list. + * + * @param lhs the first collection of IntervalLists + * @param lhs the second collection of IntervalLists + * @return an IntervalList comprising of all intervals in the first IntervalList that have at least 1bp overlap with + * any interval in the second. + */ + public static IntervalList overlaps(final IntervalList lhs, final IntervalList rhs) { + return overlaps(Collections.singletonList(lhs), Collections.singletonList(rhs)); + } + + /** + * A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval + * in the second list. + * + * @param lists1 the first collection of IntervalLists + * @param lists2 the second collection of IntervalLists + * @return an IntervalList comprising of all intervals in the first collection of lists that have at least 1bp + * overlap with any interval in the second lists. + */ + public static IntervalList overlaps(final Collection lists1, final Collection lists2) { + if(lists1.isEmpty()){ + throw new SAMException("Cannot call overlaps with the first collection having empty list of IntervalLists."); + } + + final SAMFileHeader header = lists1.iterator().next().getHeader().clone(); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + + // Create an overlap detector on list2 + final IntervalList overlapIntervals = new IntervalList(header); + for (final IntervalList list : lists2) { + SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), + list.getHeader().getSequenceDictionary()); + overlapIntervals.addall(list.getIntervals()); + } + final OverlapDetector detector = new OverlapDetector<>(0, 0); + final int dummy = -1; // NB: since we don't actually use the returned objects, we can use a dummy value + for (final Interval interval : overlapIntervals.sorted().uniqued()) { + detector.addLhs(dummy, interval); + } + + // Go through each input interval in in lists1 and see if overlaps any interval in lists2 + final IntervalList merged = new IntervalList(header); + for (final IntervalList list : lists1) { + SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), + list.getHeader().getSequenceDictionary()); + for (final Interval interval : list.getIntervals()) { + if (detector.overlapsAny(interval)) { + merged.add(interval); + } + } + } + + return merged; + } + @Override public boolean equals(final Object o) { if (this == o) return true; diff --git a/src/test/java/htsjdk/samtools/util/IntervalListTest.java b/src/test/java/htsjdk/samtools/util/IntervalListTest.java index 983820bbe..e138ee0e1 100644 --- a/src/test/java/htsjdk/samtools/util/IntervalListTest.java +++ b/src/test/java/htsjdk/samtools/util/IntervalListTest.java @@ -25,10 +25,7 @@ package htsjdk.samtools.util; import htsjdk.HtsjdkTest; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamFileHeaderMerger; +import htsjdk.samtools.*; import htsjdk.variant.vcf.VCFFileReader; import org.testng.Assert; import org.testng.annotations.BeforeTest; @@ -377,12 +374,97 @@ public void testSubtractSingletonIntervalLists(final IntervalList fromLists, fin } @Test(dataProvider = "subtractSingletonData") - public void testSubtractSingletonasListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { + public void testSubtractSingletonAsListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { Assert.assertEquals( CollectionUtil.makeCollection(IntervalList.subtract(Collections.singletonList(fromLists), Collections.singletonList(whatLists)).iterator()), CollectionUtil.makeCollection(list.iterator())); } + @DataProvider(name = "overlapsSingletonData") + public Object[][] overlapSingletonData() { + final IntervalList two_overlaps_one = new IntervalList(fileHeader); + final IntervalList three_overlaps_two = new IntervalList(fileHeader); + final IntervalList three_overlaps_one = new IntervalList(fileHeader); + final IntervalList one_overlaps_three = new IntervalList(fileHeader); + + // NB: commented lines below are there to show the intervals in the first list that will not be in the resulting list + + two_overlaps_one.add(new Interval("1", 50, 150)); + //two_overlaps_one.add(new Interval("1", 301, 500)); + two_overlaps_one.add(new Interval("2", 1, 150)); + two_overlaps_one.add(new Interval("2", 250, 270)); + two_overlaps_one.add(new Interval("2", 290, 400)); + + three_overlaps_two.add(new Interval("1", 25, 400)); + three_overlaps_two.add(new Interval("2", 200, 600)); + //three_overlaps_two.add(new Interval("3", 50, 470)); + + three_overlaps_one.add(new Interval("1", 25, 400)); + three_overlaps_one.add(new Interval("2", 200, 600)); + //three_overlaps_one.add(new Interval("3", 50, 470)); + + one_overlaps_three.add(new Interval("1", 1, 100)); + one_overlaps_three.add(new Interval("1", 101, 200)); + one_overlaps_three.add(new Interval("1", 202, 300)); + one_overlaps_three.add(new Interval("2", 200, 300)); + //one_overlaps_three.add(new Interval("2", 100, 150)); + + return new Object[][]{ + new Object[]{list1, list1, list1}, // should return itself + new Object[]{list1, IntervalList.invert(list1), new IntervalList(list1.getHeader())}, // should be empty + new Object[]{list2, list1, two_overlaps_one}, + new Object[]{list3, list2, three_overlaps_two}, + new Object[]{list3, list1, three_overlaps_one}, + new Object[]{list1, list3, one_overlaps_three} + }; + } + + @DataProvider(name = "overlapsData") + public Object[][] overlapData() { + final IntervalList three_overlaps_one_and_two = new IntervalList(fileHeader); + + three_overlaps_one_and_two.add(new Interval("1", 25, 400)); + three_overlaps_one_and_two.add(new Interval("2", 200, 600)); + //three_overlaps_one_and_two.add(new Interval("3", 50, 470)); + + return new Object[][]{ + new Object[]{CollectionUtil.makeList(list3), CollectionUtil.makeList(list1, list2), three_overlaps_one_and_two}, + }; + } + + @Test(dataProvider = "overlapsData") + public void testOverlapsIntervalLists(final List fromLists, final List whatLists, final IntervalList list) { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(fromLists, whatLists).iterator()), + CollectionUtil.makeCollection(list.iterator())); + } + + @Test(dataProvider = "overlapsSingletonData") + public void testOverlapsSingletonIntervalLists(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(fromLists, whatLists).iterator()), + CollectionUtil.makeCollection(list.iterator())); + } + + @Test(dataProvider = "overlapsSingletonData") + public void testOverlapsSingletonAsListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(Collections.singletonList(fromLists), Collections.singletonList(whatLists)).iterator()), + CollectionUtil.makeCollection(list.iterator())); + } + + @Test(expectedExceptions = SAMException.class) + public void testOverlapsEmptyFirstList() { + IntervalList.overlaps(Collections.emptyList(), Collections.singletonList(list1)); + } + + @Test + public void testOverlapsEmptySecondList() { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(Collections.singletonList(list1), Collections.emptyList()).iterator()), + Collections.emptyList()); + } + @DataProvider(name = "VCFCompData") public Object[][] VCFCompData() { return new Object[][]{