-
Notifications
You must be signed in to change notification settings - Fork 0
/
plots.R
353 lines (338 loc) · 13.2 KB
/
plots.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
#' Box and Whiskers/Jittered Dot Plot
#'
#' @import ggplot2
#'
#' @description Plots categorical x-axis and continuous y-axis.
#' Inspired by Nick Cox's Stata plug-in \code{stripplot}. As per
#' \code{\link[ggplot2]{geom_boxplot}}, boxes reach from the first to the
#' third quartile; whiskers extend 1.5 times the interquartile range
#' but not beyond the most extreme data point.
#'
#' @param data Data frame. Required.
#' @param x Categorical variable for x-axis. Required.
#' @param y Continuous variable for y-axis. Required.
#' @param contrast If added, the mean difference between extreme categories
#' will be added. \code{contrast} provides the preceding label. See example.
#' Defaults to \code{NULL} (do not show contrast).
#' @param unit Scale to print after the point estimate for the
#' contrast between extreme categories.
#' Defaults to \code{NULL} (no unit).
#' @param digits Number of digits for rounding point estimate and
#' confidence intervals of the contrast. Defaults to 2.
#' @param jitter Avoid overplotting of points with similar values?
#' Defaults to \code{TRUE}.
#' @param color Variable to color data points by. Defaults to \code{NULL} (none).
#' @param na.rm Remove "NA" category from x-axis?
#' Defaults to \code{FALSE}.
#' @param printplot \code{print()} the plot? Defaults to \code{FALSE},
#' i.e., just return the plot.
#'
#' @return ggplot object, or nothing
#' (if plot is sent to graphics device with \code{printplot = TRUE}).
#' Standard customization options for a \code{ggplot} object can be added
#' on afterwards; see example.
#' @export
#'
#' @examples
#' data(mtcars)
#'
#' # Basic plot
#' mtcars %>%
#' stripplot(x = gear, y = mpg)
#'
#' # Add mean difference between extreme categories, reduce digits,
#' # add color by 'wt', add different color scale, and label,
#' # all using standard ggplot syntax.
#' mtcars %>%
#' stripplot(x = gear, y = mpg,
#' contrast = "5 vs. 3 gears", unit = "mpg\n",
#' digits = 1, color = wt) +
#' viridis::scale_color_viridis(option = "cividis") +
#' ggplot2::labs(y = "Miles per gallon", color = "Weight")
stripplot <- function(data, x, y,
contrast = NULL,
unit = NULL,
digits = 2,
jitter = TRUE,
color = NULL,
na.rm = FALSE,
printplot = FALSE) {
x <- names(data %>% dplyr::select({{ x }}))
y <- names(data %>% dplyr::select({{ y }}))
color <- names(data %>% dplyr::select({{ color }}))
xlbl <- labelled::var_label(dplyr::pull(data, x))
ylbl <- labelled::var_label(dplyr::pull(data, y))
if(is.null(xlbl))
xlbl <- x
if(is.null(ylbl))
ylbl <- y
if(na.rm == TRUE) {
data <- data %>%
dplyr::filter(!is.na(get(x)))
}
myplot <- ggplot2::ggplot(data = data,
mapping = ggplot2::aes(x = factor(get(x)),
y = get(y))) +
ggplot2::geom_boxplot(outlier.shape = NA) +
labs(x = xlbl, y = ylbl) +
cowplot::theme_minimal_hgrid() +
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank())
if (jitter == TRUE) {
set.seed(3457)
if(length(color) == 0)
myplot <- myplot + ggplot2::geom_jitter(height = 0, width = 0.2)
else
myplot <- myplot + ggplot2::geom_jitter(height = 0, width = 0.2,
mapping = ggplot2::aes(color = get(color)))
} else {
if(length(color) == 0)
myplot <- myplot + ggplot2::geom_point()
else
myplot <- myplot + ggplot2::geom_point(mapping = ggplot2::aes(color = get(color)))
}
if(!is.null(contrast)) {
fit <- stats::lm(formula = yvar ~ xvar,
data = data %>%
dplyr::rename(xvar = dplyr::one_of(x),
yvar = dplyr::one_of(y)) %>%
dplyr::mutate(xvar = as.factor(.data$xvar)))
estlab <- paste0(
contrast,
", ",
format(round(utils::tail(stats::coef(fit), n = 1),
digits = digits), nsmall = digits),
" ",
if_else(is.na(unit), "", paste0(unit, " ")),
"(95% CI, ",
format(round(utils::tail(stats::confint.default(fit), n = 1)[1],
digits = digits), nsmall = digits),
" to ",
format(round(utils::tail(stats::confint.default(fit), n = 1)[2],
digits = digits), nsmall = digits),
")")
myplot <- myplot + ggplot2::annotate(geom = "text", label = estlab,
x = Inf, y = Inf, hjust = 1, vjust = 1)
}
if(printplot == TRUE)
print(myplot)
else
return(myplot)
}
#' Correlation plot
#'
#' @description Performs pairwise Pearson or Spearman correlations.
#'
#' @param data Required. Data frame. Only numeric variables will be used;
#' variables of other types will silently be dropped. To include
#' categorical variables, coerce factors to numeric first. See examples.
#' @param ... Optional. Variables to correlate. If not provided,
#' all numeric variables will be used. Supports tidy evaluation;
#' see examples.
#' @param method Optional. Correlation method. Defaults to \code{"pearson"}.
#' @param use How \code{\link[stats]{cor}} handles
#' missing values. Defaults to a restriction to complete
#' observations with all variables. See parameter \code{use} of
#' \code{\link[stats]{cor}} for alternatives.
#' @param reorder Perform hierarchical clustering in the correlation matrix?
#' This will order variables by their correlation patterns with other
#' variables. If turned off to \code{FALSE}, the order of variables in the
#' dataset will be retained. Defaults to \code{TRUE}.
#' @param digits Decimal digits for displayed correlation coefficients.
#' Defaults to \code{2}.
#' @param fontsize Size for text in boxes and to the left of the boxes ("y
#' axis"). Defaults to \code{4}.
#' @param legendpos (x, y) coordinates of color legend. Use
#' \code{legendpos = "none"} to turn off the legend.
#' Defaults to \code{c(0.15, 0.35)}.
#' @param cutpoints Correlation coefficient values that have a distinct
#' color. Defaults to \code{c(-1, 0, 1)}.
#' @param colors Colors for the \code{cutpoints}. Defaults to blue (for
#' negative), white (no correlation), and yellow (positive correlation) on the
#' \code{cividis} color scale.
#'
#' @return ggplot. Can be modified with the usual ggplot commands, such as
#' \code{\link[ggplot2]{theme}}.
#' @export
#'
#' @seealso \url{http://www.sthda.com/english/wiki/ggplot2-quick-correlation-matrix-heatmap-r-software-and-data-visualization}
#' @examples
#' data(mtcars)
#'
#' mtcars %>%
#' corrmat(mpg, cyl, hp, wt, qsec)
#'
#' # Can use tidy evaluation to select variables:
#' mtcars %>%
#' corrmat(contains("a"), starts_with("c"))
#'
#' # If "cyl" was a character, it would be excluded:
#' mtcars %>%
#' dplyr::mutate(cyl_chr = as.character(cyl)) %>%
#' corrmat(mpg, cyl_chr, hp, wt, qsec)
#'
#' # To retain the character variable "cyl",
#' # convert to factor and then make numeric:
#' mtcars %>%
#' dplyr::mutate(cyl_chr = as.character(cyl),
#' cyl_chr = as.numeric(factor(cyl_chr))) %>%
#' corrmat(mpg, cyl_chr, hp, wt, qsec)
corrmat <- function(
data,
...,
method = "pearson",
use = "pairwise.complete.obs",
reorder = TRUE,
digits = 2,
fontsize = 4,
legendpos = c(0.15, 0.35),
cutpoints = c(-1, 0, 1),
# for lowest value of scale:
colors = c(viridis::viridis_pal(option = "cividis")(10)[1],
"white", # middle
viridis::viridis_pal(option = "cividis")(10)[8])) { # highest
# matrix reorder by hierarchical clustering (optional)
reorder_cormat <- function(cormat) {
# Use correlation between variables as distance
dd <- stats::as.dist((1 - cormat) / 2)
hc <- stats::hclust(dd)
cormat <- cormat[hc$order, hc$order]
}
# Get upper triangle of the correlation matrix
get_upper_triangle <- function(cormat) {
cormat[base::lower.tri(cormat)] <- NA
return(cormat)
}
newdata <- data %>% select(!!!rlang::enquos(...))
if(ncol(newdata) == 0)
newdata <- data
data <- newdata
mymat <- data %>%
dplyr::select_if(is.numeric) %>%
stats::cor(method = method, use = use)
if(reorder == TRUE)
mymat <- mymat %>% reorder_cormat()
mymat %>%
get_upper_triangle() %>%
tibble::as_tibble(rownames = "var1") %>%
tidyr::pivot_longer(cols = -.data$var1, names_to = "var2",
values_to = "value",
values_drop_na = TRUE) %>%
dplyr::mutate_at(.vars = vars(.data$var1, .data$var2),
.funs = ~forcats::fct_inorder(factor(.))) %>%
dplyr::group_by(.data$var1) %>%
dplyr::mutate(ylabel = dplyr::if_else(dplyr::row_number() == 1,
true = as.character(.data$var1),
false = "")) %>%
dplyr::ungroup() %>%
ggplot2::ggplot(mapping = aes(.data$var2, .data$var1, fill = .data$value)) +
ggplot2::geom_tile(color = "white") +
ggplot2::scale_fill_gradient2(
low = colors[1],
high = colors[3],
mid = colors[2],
midpoint = cutpoints[2],
limit = c(cutpoints[1], cutpoints[3]),
space = "Lab",
name = stringr::str_to_title(paste(method,
"correlation", sep = "\n"))) +
ggplot2::coord_fixed(clip = "off") +
ggplot2::geom_text(aes(x = .data$var2, y = .data$var1,
label = format(round(.data$value, digits = digits),
nsmall = digits)),
color = "black",
size = fontsize) +
ggplot2::geom_text(mapping = aes(x = as.numeric(.data$var2) - 0.7,
y = as.numeric(.data$var1),
label = .data$ylabel),
size = fontsize,
hjust = 1) +
ggplot2::scale_x_discrete(expand = c(0, 0)) +
ggplot2::scale_y_discrete(expand = c(0, 0)) +
ggplot2::theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1),
axis.text.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
legend.position = legendpos,
plot.margin = margin(t = 0, r = 0, b = 0, l = 2,
unit = "cm"),
legend.direction = "vertical",
legend.justification = c(1, 0)) +
ggplot2::guides(fill = ggplot2::guide_colorbar(barwidth = 1, barheight = 5,
title.position = "top",
title.hjust = 0.5))
}
#' Step ribbon plots
#'
#' \code{geom_stepribbon} is an extension of the \code{geom_ribbon}, and
#' is optimized for survival function plots with pointwise confidence intervals
#' or differences between survival curves, such as for
#' \code{\link[khsmisc]{estimate_rmtl}}.
#'
#' @seealso
#' \code{\link[ggplot2]{geom_ribbon}}
#'
#' \code{geom_stepribbon} inherits from \code{geom_ribbon}.
#'
#' Original code:
#' \url{https://github.com/cran/RcmdrPlugin.KMggplot2/blob/master/R/geom-stepribbon.r}
#'
#' Modified code (used here):
#' \url{https://github.com/adibender/ldatools/blob/master/R/geom_stepribbon.R}
#'
#' @inheritParams ggplot2::geom_ribbon
#' @examples
#' library(ggplot2)
#' huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron))
#' h <- ggplot(huron, aes(year))
#' # Step ribbon
#' h + geom_stepribbon(aes(ymin = level - 1, ymax = level + 1), fill = "grey70") +
#' geom_step(aes(y = level))
#'
#' # For comparison, simple ribbon
#' h + geom_ribbon(aes(ymin = level - 1, ymax = level + 1), fill = "grey70") +
#' geom_line(aes(y = level))
#' @rdname geom_stepribbon
#' @importFrom ggplot2 layer GeomRibbon
#' @export
geom_stepribbon <- function(
mapping = NULL,
data = NULL,
stat = "identity",
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomStepribbon,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ... )
)
}
#' @rdname geom_stepribbon
#' @format NULL
#' @usage NULL
#' @export
GeomStepribbon <- ggproto(
"GeomStepribbon", GeomRibbon,
extra_params = c("na.rm"),
draw_group = function(data, panel_scales, coord, na.rm = FALSE) {
if (na.rm) data <- data[complete.cases(data[c("x", "ymin", "ymax")]), ]
data <- rbind(data, data)
data <- data[order(data$x), ]
data$x <- c(data$x[2:nrow(data)], NA)
data <- data[complete.cases(data["x"]), ]
GeomRibbon$draw_group(data, panel_scales, coord, na.rm = FALSE)
}
)