-
Notifications
You must be signed in to change notification settings - Fork 25
/
meltAssay.R
272 lines (260 loc) · 10.4 KB
/
meltAssay.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#' Converting a \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#' object into a long data.frame
#'
#' \code{meltSE} Converts a
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} object into a
#' long data.frame which can be used for \code{tidyverse}-tools.
#'
#' @details
#' If the \code{colData} contains a column \dQuote{SampleID} or the
#' \code{rowData} contains a column \dQuote{FeatureID}, they will be renamed to
#' \dQuote{SampleID_col} and \dQuote{FeatureID_row}, if row names or column
#' names are set.
#'
#' @param x A numeric matrix or a
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#'
#' @param assay.type a \code{character} value to select an
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assayNames}}
#'
#' @param assay_name a single \code{character} value for specifying which
#' assay to use for calculation.
#' (Please use \code{assay.type} instead. At some point \code{assay_name}
#' will be disabled.)
#'
#' @param add_col_data \code{NULL}, \code{TRUE} or a \code{character} vector to
#' select information from the \code{colData} to add to the molten assay data.
#' If \code{add_col_data = NULL} no data will be added, if
#' \code{add_col_data = TRUE} all data will be added and if
#' \code{add_col_data} is a \code{character} vector, it will be used to subset
#' to given column names in \code{colData}. (default:
#' \code{add_col_data = NULL})
#'
#' @param add_row_data \code{NULL}, \code{TRUE} or a \code{character} vector to
#' select information from the \code{rowData} to add to the molten assay data.
#' If \code{add_row_data = NULL} no data will be added, if
#' \code{add_row_data = TRUE} all data will be added and if
#' \code{add_row_data} is a \code{character} vector, it will be used to subset
#' to given column names in \code{rowData}. (default:
#' \code{add_row_data = NULL})
#'
#' @param feature_name a \code{character} scalar to use as the output's name
#' for the feature identifier. (default: \code{feature_name = "FeatureID"})
#'
#' @param sample_name a \code{character} scalar to use as the output's name
#' for the sample identifier. (default: \code{sample_name = "SampleID"})
#'
#' @param ... optional arguments:
#' \itemize{
#' \item check_names: A boolean value passed to data.frame function's check.name
#' argument. Determines if sample names are checked that they are syntactically
#' valid variable names and are not duplicated. If they are not, sample names
#' are modified. (default: \code{check_names = TRUE})
#' }
#'
#' @return A \code{tibble} with the molten data. The assay values are given in a
#' column named like the selected assay \code{assay.type}. In addition, a
#' column \dQuote{FeatureID} will contain the rownames, if set, and analogously
#' a column \dQuote{SampleID} with the colnames, if set
#'
#' @name meltSE
#' @author
#' Sudarshan A. Shetty
#'
#' @examples
#' data(GlobalPatterns)
#' molten_tse <- meltSE(GlobalPatterns,
#' assay.type = "counts",
#' add_row_data = TRUE,
#' add_col_data = TRUE
#' )
#' molten_tse
NULL
#' @rdname meltSE
#' @export
setGeneric("meltSE",
signature = "x",
function(x,
assay.type = assay_name, assay_name = "counts",
add_row_data = NULL,
add_col_data = NULL,
feature_name = "FeatureID",
sample_name = "SampleID",
...)
standardGeneric("meltSE")
)
.norm_add_row_data <- function(add_row_data, x, feature_name){
if(is.null(add_row_data)){
return(NULL)
}
if(anyNA(add_row_data)){
stop("'add_row_data' contains NA.", call. = FALSE)
}
cn <- colnames(rowData(x))
if(is.logical(add_row_data) && length(add_row_data) == 1L && add_row_data){
add_row_data <- cn
} else if (isFALSE(all(add_row_data %in% cn))) {
stop("Please provide valid column names with 'add_row_data' matching ",
"those in 'rowData(x)'", call. = FALSE)
}
if(!is.null(rownames(x)) && feature_name %in% add_row_data){
warning("'x' contains a column '",feature_name,"' in its ",
"rowData(), which will ",
"be renamed to '",feature_name,"_row'", call. = FALSE)
}
add_row_data
}
.norm_add_col_data <- function(add_col_data, x, sample_name){
if(is.null(add_col_data)){
return(NULL)
}
if(anyNA(add_col_data)){
stop("'add_col_data' contains NA.", call. = FALSE)
}
cn <- colnames(colData(x))
if(is.logical(add_col_data) && length(add_col_data) == 1L && add_col_data){
add_col_data <- cn
} else if (isFALSE(all(add_col_data %in% cn))) {
stop("Please provide valid column names with 'add_col_data' matching ",
"those in 'colData(x)'", call. = FALSE)
}
if(!is.null(colnames(x)) && sample_name %in% add_col_data){
warning("'x' contains a column '",sample_name,"' in its ",
"colData(), which will ",
"be renamed to '",sample_name,"_col'", call. = FALSE)
}
add_col_data
}
.col_switch_name <- function(name){
paste0(name,"_col")
}
.row_switch_name <- function(name){
paste0(name,"_row")
}
#' @importFrom dplyr mutate select
.format_molten_assay <- function(molten_assay, x,
feature_name,
sample_name){
if(is.null(rownames(x)) &&
.row_switch_name(feature_name) %in% colnames(molten_assay) &&
!anyNA(molten_assay[,.row_switch_name(feature_name)]) &&
!anyDuplicated(rowData(x)[,feature_name])){
molten_assay <- molten_assay %>%
select(!sym(feature_name)) %>%
dplyr::rename(!!sym(feature_name) := !!sym(.row_switch_name(feature_name)))
}
if(is.null(colnames(x)) &&
.col_switch_name(sample_name) %in% colnames(molten_assay) &&
!anyNA(molten_assay[,.col_switch_name(sample_name)]) &&
!anyDuplicated(colData(x)[,sample_name])){
molten_assay %>%
select(!sym(sample_name)) %>%
dplyr::rename(!!sym(sample_name) := !!sym(.col_switch_name(sample_name)))
}
molten_assay %>%
mutate(!!sym(feature_name) := factor(!!sym(feature_name)),
!!sym(sample_name) := factor(!!sym(sample_name)))
}
#' @rdname meltSE
#'
#' @export
setMethod("meltSE", signature = c(x = "SummarizedExperiment"),
function(x,
assay.type = assay_name, assay_name = "counts",
add_row_data = NULL,
add_col_data = NULL,
feature_name = "FeatureID",
sample_name = "SampleID",
...) {
# input check
.check_assay_present(assay.type, x)
if(!.is_a_string(feature_name)){
stop("'feature_name' must be a single non-empty character value.",
call. = FALSE)
}
if(!.is_a_string(sample_name)){
stop("'sample_name' must be a single non-empty character value.",
call. = FALSE)
}
# check if rownames are duplicated, and if they are, modify
if( any(duplicated(rownames(x))) ){
rownames(x) <- make.unique(rownames(x))
warning("rownames(x) included duplicates.",
" rownames(x) are made unique. ",
call. = FALSE)
}
# check selected colnames
add_row_data <- .norm_add_row_data(add_row_data, x, feature_name)
add_col_data <- .norm_add_col_data(add_col_data, x, sample_name)
molten_assay <- .melt_assay(x, assay.type, feature_name, sample_name, ...)
if(!is.null(add_row_data)){
molten_assay <-
.add_row_data_to_molten_assay(molten_assay, x, add_row_data,
feature_name)
}
if(!is.null(add_col_data)){
molten_assay <-
.add_col_data_to_molten_assay(molten_assay, x, add_col_data,
sample_name, ...)
}
.format_molten_assay(molten_assay, x, feature_name, sample_name)
}
)
# Melts assay in SummarizedExperiment
#' @importFrom SummarizedExperiment assay
#' @importFrom tibble rownames_to_column
#' @importFrom tidyr pivot_longer
#' @importFrom rlang sym
.melt_assay <- function(x, assay.type, feature_name, sample_name, check_names = FALSE,...) {
mat <- assay(x, assay.type) %>%
as.matrix()
rownames(mat) <- rownames(x)
colnames(mat) <- colnames(x)
mat %>%
data.frame(check.names = check_names) %>%
rownames_to_column(feature_name) %>%
# SampleID is unique sample id
pivot_longer(!sym(feature_name),
values_to = assay.type,
names_to = sample_name)
}
# Combines molten assay with rowData i.e. taxonomy table.
#' @importFrom SummarizedExperiment rowData
#' @importFrom rlang sym
#' @importFrom tibble rownames_to_column
#' @importFrom dplyr rename
.add_row_data_to_molten_assay <- function(molten_assay, x, add_row_data,
feature_name) {
rd <- SummarizedExperiment::rowData(x)[,add_row_data,drop=FALSE] %>%
data.frame()
if(feature_name %in% add_row_data){
rd <- rd %>%
dplyr::rename(!!sym(.row_switch_name(feature_name)) := !!sym(feature_name))
}
rd <- rd %>%
rownames_to_column(feature_name)
molten_assay %>%
dplyr::left_join(rd, by = feature_name)
}
# Combines molten assay and rowData i.e. taxonomy table with
#' @importFrom SummarizedExperiment colData
#' @importFrom rlang sym
#' @importFrom tibble rownames_to_column
#' @importFrom dplyr rename
.add_col_data_to_molten_assay <- function(molten_assay, x, add_col_data,
sample_name, check_names = FALSE,...) {
cd <- SummarizedExperiment::colData(x)[,add_col_data,drop=FALSE] %>%
data.frame()
# This makes sure that sample names match
if(check_names == TRUE){
rownames(cd) <- make.names(rownames(cd))
}
if(sample_name %in% add_col_data){
cd <- cd %>%
dplyr::rename(!!sym(.col_switch_name(sample_name)) := !!sym(sample_name))
}
cd <- cd %>%
rownames_to_column(sample_name)
molten_assay %>%
dplyr::left_join(cd, by = sample_name)
}