Adding an overlaps function to IntervalList. #877

Merged
merged 2 commits into from Jun 7, 2017
Jump to file or symbol
Failed to load files and symbols.
+145 −5
Split
@@ -739,6 +739,64 @@ public static IntervalList difference(final Collection<IntervalList> lists1, fin
subtract(lists2, lists1));
}
+ /**
+ * A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval
+ * in the second list.
+ *
+ * @param lhs the first collection of IntervalLists
+ * @param lhs the second collection of IntervalLists
+ * @return an IntervalList comprising of all intervals in the first IntervalList that have at least 1bp overlap with
+ * any interval in the second.
+ */
+ public static IntervalList overlaps(final IntervalList lhs, final IntervalList rhs) {
+ return overlaps(Collections.singletonList(lhs), Collections.singletonList(rhs));
+ }
+
+ /**
+ * A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval
+ * in the second list.
+ *
+ * @param lists1 the first collection of IntervalLists
+ * @param lists2 the second collection of IntervalLists
+ * @return an IntervalList comprising of all intervals in the first collection of lists that have at least 1bp
+ * overlap with any interval in the second lists.
+ */
+ public static IntervalList overlaps(final Collection<IntervalList> lists1, final Collection<IntervalList> lists2) {
+ if(lists1.isEmpty()){
+ throw new SAMException("Cannot call overlaps with the first collection having empty list of IntervalLists.");
+ }
+
+ final SAMFileHeader header = lists1.iterator().next().getHeader().clone();
+ header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
+
+ // Create an overlap detector on list2
+ final IntervalList overlapIntervals = new IntervalList(header);
+ for (final IntervalList list : lists2) {
+ SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(),
+ list.getHeader().getSequenceDictionary());
+ overlapIntervals.addall(list.getIntervals());
+ }
+ final OverlapDetector<Integer> detector = new OverlapDetector<>(0, 0);
+ final int dummy = -1; // NB: since we don't actually use the returned objects, we can use a dummy value
+ for (final Interval interval : overlapIntervals.sorted().uniqued()) {
+ detector.addLhs(dummy, interval);
+ }
+
+ // Go through each input interval in in lists1 and see if overlaps any interval in lists2
+ final IntervalList merged = new IntervalList(header);
+ for (final IntervalList list : lists1) {
+ SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(),
+ list.getHeader().getSequenceDictionary());
+ for (final Interval interval : list.getIntervals()) {
+ if (detector.overlapsAny(interval)) {
+ merged.add(interval);
+ }
+ }
+ }
+
+ return merged;
+ }
+
@Override
public boolean equals(final Object o) {
if (this == o) return true;
@@ -25,10 +25,7 @@
package htsjdk.samtools.util;
import htsjdk.HtsjdkTest;
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMSequenceRecord;
-import htsjdk.samtools.SamFileHeaderMerger;
+import htsjdk.samtools.*;
import htsjdk.variant.vcf.VCFFileReader;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@@ -377,12 +374,97 @@ public void testSubtractSingletonIntervalLists(final IntervalList fromLists, fin
}
@Test(dataProvider = "subtractSingletonData")
- public void testSubtractSingletonasListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) {
+ public void testSubtractSingletonAsListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) {
Assert.assertEquals(
CollectionUtil.makeCollection(IntervalList.subtract(Collections.singletonList(fromLists), Collections.singletonList(whatLists)).iterator()),
CollectionUtil.makeCollection(list.iterator()));
}
+ @DataProvider(name = "overlapsSingletonData")
+ public Object[][] overlapSingletonData() {
+ final IntervalList two_overlaps_one = new IntervalList(fileHeader);
+ final IntervalList three_overlaps_two = new IntervalList(fileHeader);
+ final IntervalList three_overlaps_one = new IntervalList(fileHeader);
+ final IntervalList one_overlaps_three = new IntervalList(fileHeader);
+
+ // NB: commented lines below are there to show the intervals in the first list that will not be in the resulting list
+
+ two_overlaps_one.add(new Interval("1", 50, 150));
+ //two_overlaps_one.add(new Interval("1", 301, 500));
+ two_overlaps_one.add(new Interval("2", 1, 150));
+ two_overlaps_one.add(new Interval("2", 250, 270));
+ two_overlaps_one.add(new Interval("2", 290, 400));
+
+ three_overlaps_two.add(new Interval("1", 25, 400));
+ three_overlaps_two.add(new Interval("2", 200, 600));
+ //three_overlaps_two.add(new Interval("3", 50, 470));
+
+ three_overlaps_one.add(new Interval("1", 25, 400));
+ three_overlaps_one.add(new Interval("2", 200, 600));
+ //three_overlaps_one.add(new Interval("3", 50, 470));
+
+ one_overlaps_three.add(new Interval("1", 1, 100));
+ one_overlaps_three.add(new Interval("1", 101, 200));
+ one_overlaps_three.add(new Interval("1", 202, 300));
+ one_overlaps_three.add(new Interval("2", 200, 300));
+ //one_overlaps_three.add(new Interval("2", 100, 150));
+
+ return new Object[][]{
+ new Object[]{list1, list1, list1}, // should return itself
+ new Object[]{list1, IntervalList.invert(list1), new IntervalList(list1.getHeader())}, // should be empty
+ new Object[]{list2, list1, two_overlaps_one},
+ new Object[]{list3, list2, three_overlaps_two},
+ new Object[]{list3, list1, three_overlaps_one},
+ new Object[]{list1, list3, one_overlaps_three}
+ };
+ }
+
+ @DataProvider(name = "overlapsData")
@yfarjoun

yfarjoun May 26, 2017

Contributor

please add a test with an empty collection on the lhs and another on the rhs.

@nh13

nh13 Jun 3, 2017

Contributor

Done.

+ public Object[][] overlapData() {
+ final IntervalList three_overlaps_one_and_two = new IntervalList(fileHeader);
+
+ three_overlaps_one_and_two.add(new Interval("1", 25, 400));
+ three_overlaps_one_and_two.add(new Interval("2", 200, 600));
+ //three_overlaps_one_and_two.add(new Interval("3", 50, 470));
+
+ return new Object[][]{
+ new Object[]{CollectionUtil.makeList(list3), CollectionUtil.makeList(list1, list2), three_overlaps_one_and_two},
+ };
+ }
+
+ @Test(dataProvider = "overlapsData")
+ public void testOverlapsIntervalLists(final List<IntervalList> fromLists, final List<IntervalList> whatLists, final IntervalList list) {
+ Assert.assertEquals(
+ CollectionUtil.makeCollection(IntervalList.overlaps(fromLists, whatLists).iterator()),
+ CollectionUtil.makeCollection(list.iterator()));
+ }
+
+ @Test(dataProvider = "overlapsSingletonData")
+ public void testOverlapsSingletonIntervalLists(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) {
+ Assert.assertEquals(
+ CollectionUtil.makeCollection(IntervalList.overlaps(fromLists, whatLists).iterator()),
+ CollectionUtil.makeCollection(list.iterator()));
+ }
+
+ @Test(dataProvider = "overlapsSingletonData")
+ public void testOverlapsSingletonAsListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) {
+ Assert.assertEquals(
+ CollectionUtil.makeCollection(IntervalList.overlaps(Collections.singletonList(fromLists), Collections.singletonList(whatLists)).iterator()),
+ CollectionUtil.makeCollection(list.iterator()));
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testOverlapsEmptyFirstList() {
+ IntervalList.overlaps(Collections.emptyList(), Collections.singletonList(list1));
+ }
+
+ @Test
+ public void testOverlapsEmptySecondList() {
+ Assert.assertEquals(
+ CollectionUtil.makeCollection(IntervalList.overlaps(Collections.singletonList(list1), Collections.emptyList()).iterator()),
+ Collections.emptyList());
+ }
+
@DataProvider(name = "VCFCompData")
public Object[][] VCFCompData() {
return new Object[][]{