-
Notifications
You must be signed in to change notification settings - Fork 19
/
ranges-overlap-filter.R
105 lines (94 loc) · 3.69 KB
/
ranges-overlap-filter.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#' Filter by overlapping/non-overlapping ranges
#'
#' @param x,y Objects representing ranges
#' @param maxgap The maximimum gap between intervals as a single
#' integer greater than or equal to -1. If you modify this argument,
#' `minoverlap` must be held fixed.
#' @param minoverlap The minimum amount of overlap between intervals
#' as a single integer greater than 0. If you modify this argument,
#' `maxgap` must be held fixed.
#'
#' @details By default, `filter_by_overlaps` and
#' `filter_by_non_overlaps` ignore strandedness for [GRanges()]
#' objects. To perform stranded operations use `filter_by_overlaps_directed` and `filter_by_non_overlaps_directed`. The argument `maxgap` is the maximum number of positions
#' between two ranges for them to be considered overlapping. Here the default
#' is set to be -1 as that is the the gap between two ranges that
#' has its start or end strictly inside the other. The argugment
#' `minoverlap` refers to the minimum number of positions
#' overlapping between ranges, to consider there to be overlap.
#'
#' @return a Ranges object
#' @importFrom IRanges subsetByOverlaps
#' @seealso \code{IRanges::\link[IRanges:findOverlaps-methods]{subsetByOverlaps()}}
#' @export
#' @examples
#' df <- data.frame(seqnames = c("chr1", rep("chr2", 2),
#' rep("chr3", 3), rep("chr4", 4)),
#' start = 1:10,
#' width = 10:1,
#' strand = c("-", "+", "+", "*", "*", "+", "+", "+", "-", "-"),
#' name = letters[1:10])
#' query <- as_granges(df)
#'
#' df2 <- data.frame(seqnames = c(rep("chr2", 2), rep("chr1", 3), "chr2"),
#' start = c(4,3,7,13,1,4),
#' width = c(6,6,3,3,3,9),
#' strand = c(rep("+", 3), rep("-", 3)))
#' subject <- as_granges(df2)
#'
#' filter_by_overlaps(query, subject)
#'
#' filter_by_overlaps_directed(query, subject)
#'
#' filter_by_non_overlaps(query, subject)
#'
#' filter_by_non_overlaps_directed(query, subject)
#'
#' @rdname ranges-filter-overlaps
filter_by_overlaps <- function(x,y, maxgap = -1L, minoverlap = 0L) {
UseMethod("filter_by_overlaps")
}
#' @export
filter_by_overlaps.IntegerRanges <- function(x, y, maxgap = -1L, minoverlap = 0L) {
subsetByOverlaps(x,y, maxgap, minoverlap)
}
#' @export
filter_by_overlaps.GenomicRanges <- function(x, y, maxgap = -1L, minoverlap =0L) {
subsetByOverlaps(x,y, maxgap, minoverlap, ignore.strand = TRUE)
}
#' @export
#' @rdname ranges-filter-overlaps
filter_by_non_overlaps <- function(x,y, maxgap, minoverlap) {
UseMethod("filter_by_non_overlaps")
}
#' @export
filter_by_non_overlaps.IntegerRanges <- function(x,y, maxgap = -1L, minoverlap = 0L) {
subsetByOverlaps(x,y, maxgap, minoverlap, invert = TRUE)
}
#' @export
filter_by_non_overlaps.GenomicRanges <- function(x,y, maxgap = -1L, minoverlap = 0L) {
subsetByOverlaps(x,y, maxgap, minoverlap,
invert = TRUE,
ignore.strand = TRUE)
}
#' @export
#' @rdname ranges-filter-overlaps
filter_by_overlaps_directed <- function(x,y, maxgap = -1L, minoverlap = 0L) {
UseMethod("filter_by_overlaps_directed")
}
#' @export
filter_by_overlaps_directed.GenomicRanges <- function(x,y, maxgap = -1L, minoverlap = 0L) {
subsetByOverlaps(x,y, maxgap, minoverlap, ignore.strand = FALSE)
}
#' @export
#' @rdname ranges-filter-overlaps
filter_by_non_overlaps_directed <- function(x,y, maxgap, minoverlap) {
UseMethod("filter_by_non_overlaps_directed")
}
#' @export
filter_by_non_overlaps_directed.GenomicRanges <- function(x,y, maxgap = -1L, minoverlap = 0L) {
subsetByOverlaps(x,y, maxgap, minoverlap,
invert = TRUE,
ignore.strand = FALSE)
}
# TODO -- add in more variants here?