-
Notifications
You must be signed in to change notification settings - Fork 1
/
plots-mgheatmap.R
403 lines (383 loc) · 16.3 KB
/
plots-mgheatmap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
#' Creates a "geneset smart" ComplexHeatmap::Heatmap
#'
#' @description
#' Before we get started, note that you probably want to use [mgheatmap2()].
#'
#' This function encapsulates many common "moves" you'll make when trying to
#' make a heatmap, especially if you are trying to show geneset activity across
#' a panel of samples.
#'
#' **NOTE**: this function will **almost certainly** reorder the rows of the
#' input matrix. If you are concatentating Heatmap objects together horizontally
#' (ie. you if you want to use a rowAnnotation along side the returned heatmap),
#' you must reorder the rows of the annotation data.frame, ie.
#' `ranno.df <- ranno.df[rownames(out@matrix),]`
#'
#' @details
#' More info here.
#'
#' @section Renaming Heatmap Rows:
#' This function leverages [renameRows()] so that you can better customize the
#' output of your heatmaps by tweaking its rownames.
#'
#' If you are plotting a **gene-level** heatmap (ie. `aggregate.by == "none"``)
#' and the `rownames()` are gene identifiers, but you want the rownames of the
#' heatmap to be gene symbols. You can perform this renaming using the
#' `rename.rows` parameter.
#'
#' * If `rename.rows` is `NULL`, then nothing is done.
#' * If `rename.rows` is a `string`, then we assume that `x` has an associated
#' metadata `data.frame` over its rows and that `rename.rows` names one of
#' its columns, ie. `DGEList$genes[[rename.rows]]` or
#' `fData(ExpressionSet)[[rename.rows]]`. The values in that column will
#' be swapped out for `x`'s rownames
#' * If `rename.rows` is a two-column data.frame, the first column is assumed
#' to be `rownames(x)` and the second is what you want to rename it to.
#' * When there are duplicates in the renamed rownames, the `rename.duplicates`
#' `...` parameter dictates the behavior. This will happen, for instance, if
#' you are trying to rename the rows of an affy matrix to gene symbols, where
#' we have multiple probe ids for one gene. When `rename.duplicates` is set to
#' `"original"`, one of the rows will get the new name, and the remaning
#' duplicate rows will keep the rownames they came in with. When set to
#' `"make.unique"`, the new names will contain `*.1`, `*.2`, etc. suffixes,
#' as you would get from using [base::make.unique()].
#'
#' Maybe you are aggregating the expression scores into geneset scores, and
#' you don't want the rownames of the heatmap to be `collection;;name` (or just
#' `name` when `rm.collection.prefx = TRUE`), you can pass in a two column
#' `data.frame`, where the first column is `collection;name` and the second
#' is the name you want to rename that to. There is an example of this in
#' the "Examples" section here.
#'
#' @export
#' @importFrom ComplexHeatmap Heatmap
#' @importFrom viridis viridis
#' @seealso [mgheatmap2()]
#'
#' @param x the data matrix
#' @param gdb `GeneSetDb` object that holds the genesets to plot. Defaults to
#' `NULL`, which will plot all rows in `x`.
#' @param col a colorRamp(2) function
#' @param aggregate.by the method used to generate single-sample geneset
#' scores. Default is `none` which plots heatmap at the gene level
#' @param split introduce row-segmentation based on genesets or collections?
#' Defaults is `TRUE` which will create split heatmaps based on
#' collection if `aggregate.by != 'none'`, or based on gene sets
#' if `aggregate.by == "none"`.
#' @param scores If `aggregate.by != "none"` you can pass in a precomupted
#' [scoreSingleSamples()] result, otherwise one will be
#' computed internally. Note that if this is a `data.frame` of
#' pre-computed scores, the `gdb` is largely irrelevant (but still
#' required).
#' @param gs.order This is experimental, and is here to help order the order
#' of the genesets (or genesets collection) in a different way than the
#' default. By default, `gs.order = NULL` and genesets are enumerated in
#' alphabetical in the heatmap. You can pass in a character vector that will
#' dictate the order of the genesets displayed in the heatmap. Currently this
#' only matches against the `"name"` value of the geneset and probably only
#' works when `split = TRUE`. We will support `colleciton,name` tuples soon.
#' This can be a superset of the names found in `gdb`. As of ComplexHeatmap
#' v2 (maybe earlier versions), this doesn't really work when
#' `cluster_rows = TRUE`.
#' @param name passed down to [ComplexHeatmap::Heatmap()]
#' @param rm.collection.prefix When `TRUE` (default), removes the collection
#' name from the genesets annotated on the heatmap.
#' @param center,scale boolean parameters passed down into the the single
#' sample gene set scoring methods defined by `aggregate.by`
#' @param rm.dups if `aggregate.by == 'none'`, do we remove genes that
#' appear in more than one geneset? Defaults to `FALSE`
#' @param recenter do you want to mean center the rows of the heatmap matrix
#' prior to calling [ComplexHeatmap::Heatmap()]?
#' @param rescale do you want to standardize the row variance to one on the
#' values of the heatmap matrix prior to calling
#' [ComplexHeatmap::Heatmap()]?
#' @param rename.rows defaults to `NULL`, which induces no action. Specifying
#' a paramter here assumes you want to rename the rows of the heatmap.
#' Please refer to the "Renaming Rows" section for details.
#' @param zero_center_colramp Used to specify the type of color ramp to generate
#' when `col` is `NULL`. By default (`NULL`) we try to guess if we should
#' generate a 0-centered (blue, white, red) color ramp, or an absolute
#' (viridis style) one. The guessing functionality isn't that great, so
#' it doesn't hurt to explicitly set this to `TRUE` or `FALSE`.
#' @param zlim Used to control the color saturation of the heatmap when the
#' `col` parameter is not provided. If `NULL`, (default), extreme values
#' (outside the `c(0.025, 0.975)` quantiles) are axed and the colorRamp is
#' based on the remaining value range. If `FALSE`, the range of the colorRamp
#' is defined by the min/max values. Otherwise a length(2) numeric can be
#' supplied. If the values are between `[0,1]`, then we assume this is a
#' quantile range to be calculated. Otherwise the number are assumed to
#' mark the top and bottom of the color scale range you want to use.
#' @param transpose Flip display so that rows are columns. Default is `FALSE`.
#' @param ... parameters to send down to [scoreSingleSamples()],
#' [ComplexHeatmap::Heatmap()], [renameRows()] internal `as_matrix()`.
#' @return A `Heatmap` object.
#'
#' @examples
#' \donttest{
#' library(ComplexHeatmap)
#' vm <- exampleExpressionSet()
#' gdb <- exampleGeneSetDb()
#' col.anno <- ComplexHeatmap::HeatmapAnnotation(
#' df = vm$targets[, c("Cancer_Status", "PAM50subtype")],
#' col = list(
#' Cancer_Status = c(normal = "grey", tumor = "red"),
#' PAM50subtype = c(Basal = "purple", Her2 = "green", LumA = "orange")))
#' mgh <- mgheatmap(vm, gdb, aggregate.by = "ewm", split=TRUE,
#' top_annotation = col.anno, show_column_names = FALSE,
#' column_title = "Gene Set Activity in BRCA subset")
#'
#' # Maybe you want the rownames of the matrix to use spaces instead of "_"
#' rr <- geneSets(gdb)[, "name", drop = FALSE]
#' rr$newname <- gsub("_", " ", rr$name)
#' mg2 <- mgheatmap(vm, gdb, aggregate.by='ewm', split=TRUE,
#' top_annotation = col.anno, show_column_names = FALSE,
#' column_title = "Gene Set Activity in BRCA subset",
#' rename.rows = rr)
#' }
mgheatmap <- function(x, gdb = NULL, col = NULL,
aggregate.by = c("none", "ewm", "ewz", "zscore"),
split = TRUE, scores = NULL, gs.order = NULL,
name = NULL, rm.collection.prefix = TRUE,
rm.dups = FALSE, recenter = FALSE, rescale = FALSE,
center = TRUE, scale = TRUE,
rename.rows = NULL,
zero_center_colramp = NULL, zlim = NULL,
transpose = FALSE, ...) {
X <- as_matrix(x, ...)
if (is.null(scores)) {
aggregate.by <- match.arg(aggregate.by)
} else {
stopifnot(
is.character(aggregate.by),
length(aggregate.by) == 1L,
aggregate.by %in% scores$method)
}
if (!is.null(gdb)) {
if (!is(gdb, "GeneSetDb")) {
gdb <- GeneSetDb(gdb)
}
}
# split.by <- match.arg(split.by)
drop1.split <- missing(split)
stopifnot(is.logical(split) && length(split) == 1L)
if (!is.null(scores)) stopifnot(is.data.frame(scores))
if (!missing(zlim) && !is.null(zlim)) {
stopifnot(
is.numeric(zlim),
length(zlim) == 2L,
zlim[1] < zlim[2])
}
stopifnot(
ncol(X) > 1L,
!any(is.na(X)))
if (!is.null(scores)) {
}
if (!is.null(gdb)) {
gdbc <- suppressWarnings(conform(gdb, X, ...))
gdbc.df <- as.data.frame(gdbc) # keep only genes that matched in gdb.df
# Order genesets in requested (if any) order
if (!is.null(gs.order)) {
assert_character(gs.order, min.len = 1)
gs.order <- unique(c(gs.order, gdbc.df[["name"]]))
gs.order <- intersect(gs.order, gdbc.df[["name"]])
assert_set_equal(gs.order, gdbc.df[["name"]])
name. <- factor(gdbc.df[["name"]], gs.order)
gdbc.df <- gdbc.df[order(name.),,drop = FALSE]
}
# Set this up so we can order the data.frame in the way requested by user
gdbc.df$key <- encode_gskey(gdbc.df)
}
# What is recenter doing?
# 1. The user can set it to `TRUE` to center all values on the mean of their
# row. (`FALSE` does no centering)
# 2. A (named) vector of values that is a superset of rownames(x). These will
# be the values that are subtracted from each row.
# 3. A logical vector as long as ncol(x). Each value will be centered to the
# mean of the values of the columns specified as TRUE.
# 4. An integer vector, the is the analog of 3 but specifies the columns to
# use for centering.
if (!test_flag(recenter)) {
if (test_logical(recenter) && length(recenter) == ncol(X)) {
# indicator of which columns to calculate mean from and recenter to
recenter <- which(recenter)
}
if (test_integerish(recenter, lower = 1L, upper = ncol(X), unique = TRUE)) {
recenter <- rowMeans(X[, recenter, drop = FALSE])
}
assert_numeric(recenter, min.len = nrow(X), names = "unique")
assert_subset(rownames(X), names(recenter))
recenter <- recenter[rownames(X)]
}
if (!test_flag(center)) {
assert_numeric(center, min.len = nrow(X), names = "unique")
assert_subset(rownames(X), names(center))
center <- center[rownames(X)]
}
if (aggregate.by == "none") {
if (!is.null(gdb)) {
ridx <- if (rm.dups) unique(gdbc.df$feature_id) else gdbc.df$feature_id
# We may have a sparse matrix at this point, turning it to dense for now,
# but need to fix.
X <- X[ridx,,drop=FALSE]
if (is.numeric(recenter)) recenter <- recenter[ridx]
if (is.numeric(center)) center <- center[ridx]
split <- if (split) gdbc.df$key else NULL
}
} else {
if (is.null(scores)) {
if (is.numeric(recenter) &&
isTRUE(all.equal(names(recenter), rownames(X)))) {
# DEBUG: You are making this hard on yourself! You think you know what
# you're doing now, but you'll be crying next time you have to revisit
# this!
X <- X - recenter
center <- FALSE
recenter <- FALSE
}
X <- scoreSingleSamples(gdb, X, methods = aggregate.by, as.matrix=TRUE,
center = center, scale = scale, ...)
} else {
xs <- setDT(scores[scores[['method']] == aggregate.by,,drop=FALSE])
xs[, key:= encode_gskey(xs)]
xw <- dcast(xs, key ~ sample_id, value.var = "score")
xw <- unique(xw, by = "key")
X <- as.matrix(xw[, -1, with = FALSE])
rownames(X) <- xw[[1]]
}
# If we want to split, it (only?) makes sense to split by collection
split <- if (split) split_gskey(rownames(X))$collection else NULL
}
if (!isFALSE(recenter) || !isFALSE(rescale)) {
if (is(X, "sparseMatrix")) {
X <- as.matrix(X)
}
X <- t(scale(t(X), center = recenter, scale = rescale))
isna <- which(is.na(X), arr.ind = TRUE)
if (nrow(isna) > 0L) {
na.rows <- unique(isna[, "row"])
if (length(na.rows) == nrow(X)) {
stop("All rows removed after `scale`")
}
warning(length(na.rows), " features NA'd during `scale`, ",
"these are removed", immediate. = TRUE)
X <- X[-na.rows,,drop = FALSE]
split <- split[-na.rows]
}
}
# What kind of colorscale are we going to use?
# If this is 0-centered ish, we use a red-white-blue scheme, otherwise
# we use viridis.
if (is.null(col)) {
if (is.null(zero_center_colramp)) {
zero_center_colramp <- .looks0centered(X)
}
assert_flag(zero_center_colramp)
if (zero_center_colramp) {
if (is.null(zlim)) {
fpost <- quantile(abs(X), 0.975)
zlim <- c(-fpost, fpost)
} else if (isFALSE(zlim)) {
fpost <- c(min(X), max(X))
}
assert_numeric(zlim, len = 2)
assert_true(zlim[1] < zlim[2])
if (zlim[1L] >= 0 && zlim[2L] <= 1) {
# quantiles
fpost <- quantile(X, zlim)
} else {
fpost <- zlim
}
col <- circlize::colorRamp2(
c(fpost[1L], 0, fpost[2L]),
c('#1F294E', '#F7F7F7', '#6E0F11'))
} else {
if (is.null(zlim)) {
zlim <- quantile(X, c(0.025, 0.975))
} else if (isFALSE(zlim)) {
zlim <- c(min(X), max(X))
}
assert_numeric(zlim, len = 2)
assert_true(zlim[1] < zlim[2])
if (zlim[1L] >= 0 && zlim[2L] <= 1) {
fpost <- quantile(X, zlim)
} else {
fpost <- zlim
}
breaks <- seq(fpost[1], fpost[2], length.out = 21)
col <- circlize::colorRamp2(breaks, viridis::viridis(21))
}
}
stopifnot(is.function(col))
if (drop1.split && !is.null(split) && length(unique(split)) == 1L) {
split <- NULL
}
if (rm.collection.prefix) {
if (aggregate.by != 'none') {
rownames(X) <- split_gskey(rownames(X))$name
} else {
if (!is.null(split)) {
# The order of the splits should be preserved up until this point.
# Since this is our final "look" at the split character vector, let's
# set this as a factor with the levels set in the order of their first
# appearance.
split <- split_gskey(split)$name
split <- factor(split, unique(split))
}
}
}
## Catch Heatmap arguments in `...` and build a list do do.call() them down
## into the function call.
dot.args <- list(...)
hm.args.default <- as.list(formals(Heatmap))
if (is.null(name)) {
name <- if (aggregate.by == 'none') 'value' else 'score'
}
hm.args <- dot.args[intersect(names(dot.args), names(hm.args.default))]
hm.args[['matrix']] <- X
hm.args[['col']] <- col
hm.args[['row_split']] <- split
hm.args[['name']] <- name
row.labels <- rownames(X)
if (!is.null(rename.rows)) {
has.meta <- is(x, "DGEList") ||
is(x, "EList") ||
is(x, "SummarizedExperiment") ||
is(x, "eSet")
is.string <- is.character(rename.rows) && length(rename.rows) == 1L
if (aggregate.by == "none") {
if (has.meta && is.string) {
metadf <- fdata(x, as.df = TRUE)
metadf <- data.frame(rn = rownames(x), to = metadf[[rename.rows]],
stringsAsFactors = FALSE)
if (!is.null(metadf$to)) {
row.labels <- rownames(renameRows(X, xref = metadf, ...))
} else {
warning("rename.rows column not found in metadata for x")
}
} else {
row.labels <- rownames(renameRows(X, rename.rows, ...))
}
} else {
if (!(is.data.frame(rename.rows) && ncol(rename.rows) == 2)) {
warning("rename.rows parameter must be a 2 column data.frame when ",
"aggregate.by != 'none'", immediate. = TRUE)
} else {
if (rm.collection.prefix && any(grepl(";", rename.rows[[1]]))) {
rr <- rename.rows
rr[[1L]] <- sub("^.*;;?", "", rename.rows[[1L]])
rename.rows <- rbind(rename.rows, rr)
}
row.labels <- rownames(renameRows(X, rename.rows, ...))
}
}
}
hm.args[["row_labels"]] <- row.labels
H <- do.call(ComplexHeatmap::Heatmap, hm.args)
H
}
#' @noRd
.looks0centered <- function(x, ...) {
x <- as.vector(x)
qtiles <- quantile(x, c(0.4, 0.6))
qtiles[1L] < 0 && qtiles[2L] > 0 && abs(qtiles[2] - qtiles[1]) < 1
}