-
Notifications
You must be signed in to change notification settings - Fork 25
/
calculateOverlap.R
151 lines (142 loc) · 5.35 KB
/
calculateOverlap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#' Estimate overlap
#'
#' This function calculates overlap for all sample-pairs
#' in a \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#' object.
#'
#' @param x a
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#' object containing a tree.
#'
#' @param assay.type A single character value for selecting the
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}}
#' to calculate the overlap.
#'
#' @param assay_name a single \code{character} value for specifying which
#' assay to use for calculation.
#' (Please use \code{assay.type} instead. At some point \code{assay_name}
#' will be disabled.)
#'
#' @param detection A single numeric value for selecting detection threshold for
#' absence/presence of features. Feature that has abundance under threshold in
#' either of samples, will be discarded when evaluating overlap between samples.
#'
#' @param ... Optional arguments not used.
#'
#' @return calculateOverlap returns sample-by-sample distance matrix.
#' runOverlap returns \code{x} that includes overlap matrix in its
#' reducedDim.
#'
#' @details These function calculates overlap between all the sample-pairs. Overlap
#' reflects similarity between sample-pairs.
#'
#' When overlap is calculated using relative abundances, the higher the value the
#' higher the similarity is, When using relative abundances, overlap value 1 means that
#' all the abundances of features are equal between two samples, and 0 means that
#' samples have completely different relative abundances.
#'
#' @seealso
#' \code{\link[mia:calculateJSD]{calculateJSD}}
#' \code{\link[mia:calculateUnifrac]{calculateUnifrac}}
#'
#'
#' @name calculateOverlap
#' @export
#'
#' @author Leo Lahti and Tuomas Borman. Contact: \url{microbiome.github.io}
#'
#' @examples
#' data(esophagus)
#' tse <- esophagus
#' tse <- transformAssay(tse, method = "relabundance")
#' overlap <- calculateOverlap(tse, assay_name = "relabundance")
#' overlap
#'
#' # Store result to reducedDim
#' tse <- runOverlap(tse, assay.type = "relabundance", name = "overlap_between_samples")
#' head(reducedDims(tse)$overlap_between_samples)
#'
NULL
#' @rdname calculateOverlap
#' @export
setGeneric("calculateOverlap", signature = c("x"),
function(x, assay.type = assay_name, assay_name = "counts",
detection = 0, ...)
standardGeneric("calculateOverlap"))
#' @rdname calculateOverlap
#' @export
setMethod("calculateOverlap", signature = c(x = "SummarizedExperiment"),
function(x, assay.type = assay_name, assay_name = "counts",
detection = 0, ...){
############################# INPUT CHECK ##############################
# Check assay.type
.check_assay_present(assay.type, x)
# Check detection
if (!.is_numeric_string(detection)) {
stop("'detection' must be a single numeric value or coercible to ",
"one.",
call. = FALSE)
}
detection <- as.numeric(detection)
########################### INPUT CHECK END ############################
# Get assay
assay <- assay(x, assay.type)
# All the sample pairs
sample_pairs <- as.matrix(expand.grid(colnames(x), colnames(x)))
# Loop through all sample pairs
result <- apply(sample_pairs, 1, FUN = function(sample_pair){
# Get samples
sample1 <- assay[ , sample_pair[1]]
sample2 <- assay[ , sample_pair[2]]
# Calculate overlap
temp_result <- .calculate_overlap(sample1,
sample2,
detection)
})
# Create a matrix from result vector and give name to rownames and colnames
result <- matrix(result, ncol = ncol(assay))
colnames(result) <- colnames(assay)
rownames(result) <- colnames(assay)
# Convert into distances
result <- stats::as.dist(result)
return(result)
}
)
#' @rdname calculateOverlap
#' @export
setGeneric("runOverlap", signature = c("x"),
function(x, ...)
standardGeneric("runOverlap"))
#' @rdname calculateOverlap
#'
#' @param name A single character value specifying the name of overlap matrix that
#' is stored in reducedDim(x).
#'
#' @export
#' @importFrom SingleCellExperiment reducedDim<-
setMethod("runOverlap", signature = c(x = "SummarizedExperiment"),
function(x, name = "overlap", ...){
# Check name
if(!.is_non_empty_string(name)){
stop("'name' must be a non-empty single character value.",
call. = FALSE)
}
# Calculate overlap
mat <- calculateOverlap(x, ...)
# Convert it into matrix so that nrow equals number of samples
mat <- as.matrix(mat)
# Store it to reducedDim
reducedDim(x, name) <- mat
return(x)
}
)
################################ HELP FUNCTIONS ################################
.calculate_overlap <- function (x, y, detection) {
# Take those taxa that have abundance over threshold
inds <- which(x > detection & y > detection)
x <- x[inds]
y <- y[inds]
# Overlap is the average of the sums of the values in each sample
overlap <- (sum(x) + sum(y))/2
return(overlap)
}