-
Notifications
You must be signed in to change notification settings - Fork 25
/
runDPCoA.R
217 lines (210 loc) · 7.13 KB
/
runDPCoA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#' Calculation of Double Principal Correspondance analysis
#'
#' Double Principal Correspondance analysis is made available via the
#' \code{ade4} package in typical fashion. Results are stored in the
#' \code{reducedDims} and are available for all the expected functions.
#'
#' @param x For \code{getDPCoA}, a numeric matrix of expression values
#' where rows are features and columns are cells.
#' Alternatively, a \code{TreeSummarizedExperiment} containing such a matrix.
#'
#' For \code{addDPCoA} a \linkS4class{TreeSummarizedExperiment} containing the
#' expression values as well as a \code{rowTree} to calculate \code{y} using
#' \code{\link[ape:cophenetic.phylo]{cophenetic.phylo}}.
#'
#' @param y a \code{dist} or a symmetric \code{matrix} compatible with
#' \code{ade4:dpcoa}
#'
#' @param ncomponents Numeric scalar indicating the number of DPCoA dimensions
#' to obtain.
#'
#' @param ntop Numeric scalar specifying the number of features with the highest
#' variances to use for dimensionality reduction. Alternatively \code{NULL},
#' if all features should be used. (default: \code{ntop = NULL})
#'
#' @param subset_row Vector specifying the subset of features to use for
#' dimensionality reduction. This can be a character vector of row names, an
#' integer vector of row indices or a logical vector.
#'
#' @param scale Logical scalar, should the expression values be standardized?
#'
#' @param transposed Logical scalar, is x transposed with cells in rows?
#'
#' @param assay.type a single \code{character} value for specifying which
#' assay to use for calculation.
#'
#' @param exprs_values a single \code{character} value for specifying which
#' assay to use for calculation.
#' (Please use \code{assay.type} instead.)
#'
#' @param assay_name a single \code{character} value for specifying which
#' assay to use for calculation.
#' (Please use \code{assay.type} instead. At some point \code{assay_name}
#' will be disabled.)
#'
#' @param tree_name a single \code{character} value for specifying which
#' rowTree will be used in calculation.
#' (By default: \code{tree_name = "phylo"})
#'
#' @param altexp String or integer scalar specifying an alternative experiment
#' containing the input data.
#'
#' @param name String specifying the name to be used to store the result in the
#' reducedDims of the output.
#'
#' @param ... Currently not used.
#'
#' @details
#' In addition to the reduced dimension on the features, the reduced dimension
#' for samples are returned as well as \code{sample_red} attribute.
#' \code{eig}, \code{feature_weights} and \code{sample_weights} are
#' returned as attributes as well.
#'
#' @returns
#' For \code{getDPCoA} a matrix with samples as rows and CCA dimensions as
#' columns
#'
#' For \code{addDPCoA} a modified \code{x} with the results stored in
#' \code{reducedDim} as the given \code{name}
#'
#'
#' @name runDPCoA
#' @seealso
#' \code{\link[scater:plotReducedDim]{plotReducedDim}}
#' \code{\link[SingleCellExperiment:reducedDims]{reducedDims}}
#'
#' @examples
#' data(esophagus)
#' dpcoa <- getDPCoA(esophagus)
#' head(dpcoa)
#'
#' esophagus <- addDPCoA(esophagus)
#' reducedDims(esophagus)
#'
#' library(scater)
#' plotReducedDim(esophagus, "DPCoA")
NULL
#' @export
#' @rdname runDPCoA
setGeneric("getDPCoA", signature = c("x", "y"),
function(x, y, ...)
standardGeneric("getDPCoA"))
.calculate_dpcoa <- function(x, y, ncomponents = 2, ntop = NULL,
subset_row = NULL, scale = FALSE,
transposed = FALSE, ...)
{
.require_package("ade4")
# input check
# Check ncomponents
if( !(.is_an_integer(ncomponents) && ncomponents > 0) ){
stop("'ncomponents' must be a single integer value specifying the number ",
"of DPCoA dimensions.", call. = FALSE)
}
# Check ntop
if( !(is.null(ntop) || (.is_an_integer(ntop) && ntop > 0)) ){
stop("'ntop' must be NULL or a single integer value specifying the number ",
"of features with the highest variance.", call. = FALSE)
}
y <- as.matrix(y)
if(length(unique(dim(y))) != 1L){
stop("'y' must be symmetric.", call. = FALSE)
}
#
# Get NAs. ade4:dpcoa lead to an error if there are any NAs
if( any( is.na(x) ) ){
stop("'x' includes NAs. Please try to convert them into numeric values.",
call. = FALSE)
}
if(!transposed) {
if(is.null(ntop)){
ntop <- nrow(x)
}
x <- .get_mat_for_reddim(x, subset_row = subset_row, ntop = ntop,
scale = scale)
}
y <- y[colnames(x), colnames(x), drop = FALSE]
if(nrow(y) != ncol(x)){
stop("x and y must have corresponding dimensions.", call. = FALSE)
}
y <- sqrt(y)
y <- as.dist(y)
#
dpcoa <- ade4::dpcoa(data.frame(x), y, scannf = FALSE, nf = ncomponents)
ans <- as.matrix(dpcoa$li)
rownames(ans) <- rownames(x)
colnames(ans) <- NULL
attr(ans,"eig") <- dpcoa$eig
tmp <- as.matrix(dpcoa$dls)
rownames(tmp) <- colnames(x)
colnames(tmp) <- NULL
attr(ans,"sample_red") <- tmp
attr(ans,"feature_weights") <- unname(dpcoa$dw)
attr(ans,"sample_weights") <- unname(dpcoa$lw)
ans
}
#' @export
#' @rdname runDPCoA
setMethod("getDPCoA", c("ANY","ANY"), .calculate_dpcoa)
#' @export
#' @importFrom ape cophenetic.phylo
#' @rdname runDPCoA
setMethod("getDPCoA", signature = c("TreeSummarizedExperiment","missing"),
function(x, ..., assay.type = assay_name, assay_name = exprs_values,
exprs_values = "counts", tree_name = "phylo")
{
.require_package("ade4")
# Check assay.type
.check_assay_present(assay.type, x)
# Check tree_name
.check_rowTree_present(tree_name, x)
#
# Get tree
tree <- rowTree(x, tree_name)
# Select only those features that are in the rowTree
whichTree <- rowLinks(x)[ , "whichTree"] == tree_name
if( any(!whichTree) ){
warning("Not all rows were present in the rowTree specified by 'tree_name'.",
"'x' is subsetted.", call. = FALSE)
# Subset the data
x <- x[ whichTree, ]
}
dist <- cophenetic.phylo(tree)
# Get assay
mat <- assay(x, assay.type)
getDPCoA(mat, dist, ...)
}
)
#' @export
#' @rdname runDPCoA
#' @aliases getDPCoA
calculateDPCoA <- function(x,...){
getDPCoA(x,...)
}
#' @export
#' @rdname runDPCoA
#' @importFrom SingleCellExperiment reducedDim<-
addDPCoA <- function(x, ..., altexp = NULL, name = "DPCoA"){
# Input check
# Check and get altExp if altexp is not NULL
if( !is.null(altexp) ){
# Check altexp
.check_altExp_present(altexp, x)
# Get altExp
y <- altExp(x, altexp)
} else {
y <- x
}
# Check name
if( !.is_a_string(name) ){
stop("'name' must be a single character value specifying a name of ",
"reducedDim where the result will be stored.", call. = FALSE)
}
reducedDim(x, name) <- getDPCoA(y, ...)
x
}
#' @export
#' @rdname runDPCoA
#' @aliases addDPCoA
runDPCoA <- function(x,...){
addDPCoA(x,...)
}