/
colormapMiss.R
338 lines (335 loc) · 14.2 KB
/
colormapMiss.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# ---------------------------------------
# Author: Andreas Alfons, Bernd Prantner
# and Daniel Schopfhauser
# Vienna University of Technology
# ---------------------------------------
#' Colored map with information about missing/imputed values
#'
#' Colored map in which the proportion or amount of missing/imputed values in
#' each region is coded according to a continuous or discrete color scheme.
#' The sequential color palette may thereby be computed in the *HCL* or
#' the *RGB* color space.
#'
#' The proportion or amount of missing/imputed values in `x` of each
#' region is coded according to a continuous or discrete color scheme in the
#' color range defined by `col`. In addition, the proportions or numbers
#' can be shown as labels in the regions.
#'
#' If `interactive` is `TRUE`, clicking in a region displays more
#' detailed information about missing/imputed values on the console. Clicking
#' outside the borders quits the interactive session.
#'
#' @rdname colormapMiss
#' @aliases colormapMiss colormapMissLegend
#' @param x a numeric vector.
#' @param region a vector or factor of the same length as `x` giving the
#' regions.
#' @param map an object of any class that contains polygons and provides its
#' own plot method (e.g., `"SpatialPolygons"` from package `sp`).
#' @param imp_index a logical-vector indicating which values of \sQuote{x} have
#' been imputed. If given, it is used for highlighting and the colors are
#' adjusted according to the given colors for imputed variables (see
#' `col`).
#' @param prop a logical indicating whether the proportion of missing/imputed
#' values should be used rather than the total amount.
#' @param polysRegion a numeric vector specifying the region that each polygon
#' belongs to.
#' @param range a numeric vector of length two specifying the range (minimum
#' and maximum) of the proportion or amount of missing/imputed values to be
#' used for the color scheme.
#' @param n for `colormapMiss`, the number of equally spaced cut-off
#' points for a discretized color scheme. If this is not a positive integer, a
#' continuous color scheme is used (the default). In the latter case, the
#' number of rectangles to be drawn in the legend can be specified in
#' `colormapMissLegend`. A reasonably large number makes it appear
#' continuously.
#' @param col the color range (start end end) to be used. RGB colors may be
#' specified as character strings or as objects of class
#' "[colorspace::RGB()]". HCL colors need to be specified as objects
#' of class "[colorspace::polarLUV()]". If only one color is
#' supplied, it is used as end color, while the start color is taken to be
#' transparent for RGB or white for HCL.
#' @param gamma numeric; the display *gamma* value (see
#' [colorspace::hex()]).
#' @param fixup a logical indicating whether the colors should be corrected to
#' valid RGB values (see [colorspace::hex()]).
#' @param coords a matrix or `data.frame` with two columns giving the
#' coordinates for the labels.
#' @param numbers a logical indicating whether the corresponding proportions or
#' numbers of missing/imputed values should be used as labels for the regions.
#' @param digits the number of digits to be used in the labels (in case of
#' proportions).
#' @param cex.numbers the character expansion factor to be used for the labels.
#' @param col.numbers the color to be used for the labels.
#' @param legend a logical indicating whether a legend should be plotted.
#' @param interactive a logical indicating whether more detailed information
#' about missing/imputed values should be displayed interactively (see
#' \sQuote{Details}).
#' @param xleft left *x* position of the legend.
#' @param ybottom bottom *y* position of the legend.
#' @param xright right *x* position of the legend.
#' @param ytop top *y* position of the legend.
#' @param cmap a list as returned by `colormapMiss` that contains the
#' required information for the legend.
#' @param horizontal a logical indicating whether the legend should be drawn
#' horizontally or vertically.
#' @param \dots further arguments to be passed to `plot`.
#' @return `colormapMiss` returns a list with the following components:
#' - nmiss a numeric vector containing the number of missing/imputed
#' values in each region.
#' - nobs a numeric vector containing the number of observations in
#' each region.
#' - pmiss a numeric vector containing the proportion of missing
#' values in each region.
#' - prop a logical indicating whether the proportion of
#' missing/imputed values have been used rather than the total amount.
#' - range the range of the proportion or amount of missing/imputed
#' values corresponding to the color range.
#' - n either a positive integer giving the number of equally spaced
#' cut-off points for a discretized color scheme, or `NULL` for a
#' continuous color scheme.
#' - start the start color of the color scheme.
#' - end the end color of the color scheme.
#' - space a character string giving the color space (either
#' `"rgb"` for RGB colors or `"hcl"` for HCL colors).
#' - gamma numeric; the display *gamma* value (see
#' [colorspace::hex()]).
#' - fixup a logical indicating whether the colors have been
#' corrected to valid RGB values (see [colorspace::hex()]).
#' @note Some of the argument names and positions have changed with versions
#' 1.3 and 1.4 due to extended functionality and for more consistency with
#' other plot functions in `VIM`. For back compatibility, the arguments
#' `cex.text` and `col.text` can still be supplied to \code{\dots{}}
#' and are handled correctly. Nevertheless, they are deprecated and no longer
#' documented. Use `cex.numbers` and `col.numbers` instead.
#' @author Andreas Alfons, modifications to show imputed values by Bernd
#' Prantner
#' @seealso [colSequence()], [growdotMiss()],
#' [mapMiss()]
#' @references M. Templ, A. Alfons, P. Filzmoser (2012) Exploring incomplete
#' data using visualization tools. *Journal of Advances in Data Analysis
#' and Classification*, Online first. DOI: 10.1007/s11634-011-0102-y.
#' @keywords hplot
#' @export
colormapMiss <- function(x, region, map, imp_index = NULL,
prop = TRUE, polysRegion = 1:length(x), range = NULL,
n = NULL, col = c("red","orange"),
gamma = 2.2, fixup = TRUE, coords = NULL,
numbers = TRUE, digits = 2, cex.numbers = 0.8,
col.numbers = par("fg"), legend = TRUE,
interactive = TRUE, ...) {
check_data(x)
x <- as.data.frame(x)
# back compatibility
dots <- list(...)
if(missing(cex.numbers) && "cex.text" %in% names(dots)) {
cex.numbers <- dots$cex.text
}
if(missing(col.numbers) && "col.text" %in% names(dots)) {
col.numbers <- dots$col.text
}
# initializations
imputed <- FALSE
if(!is.null(imp_index)) {
if(any(is.na(x))) {
imputed <- FALSE
warning("'imp_index' is given, but there are missing values in 'x'! 'imp_index' will be ignored.", call. = FALSE)
} else {
if(is.numeric(imp_index) && range(imp_index) == c(0,1)) imp_index <- as.logical(imp_index)
else if(!is.logical(imp_index)) stop("The missing-index of the imputed Variable must be of the type logical")
imputed <- TRUE
}
}
x <- as.vector(x)
region <- as.factor(region)
if(!is.null(coords)) { # error messages
if(!(inherits(coords, c("data.frame","matrix"))))
stop("'coords' must be a data.frame or matrix")
if(ncol(coords) != 2) stop("'coords' must be 2-dimensional")
}
if(is.character(map)) map <- get(map, envir=.GlobalEnv)
prop <- isTRUE(prop)
# check colors
if(!is(col, "RGB") && !is(col, "polarLUV") &&
(!is.character(col) || length(col) == 0 || col == c("red","orange"))) {
if(!imputed) col <- "red"
else col <- "orange"
}
if(is.character(col)) {
# colors given as character string
if(length(col) == 1) {
start <- par("bg")
end <- col
} else {
start <- col[1]
end <- col[2]
}
space <- "rgb"
} else {
space <- if(is(col, "RGB")) "rgb" else "hcl"
if(nrow(coords(col)) == 1) {
if(is(col, "RGB")) {
# RGB colors
start <- par("bg")
} else {
# HCL colors
start <- polarLUV(0, 0, col@coords[1, "H"])
}
end <- col
} else {
start <- col[1,]
end <- col[2,]
}
}
# compute number and proportions of missing values
if(!imputed) nmiss <- tapply(x, list(region), countNA)
else {
getImp <- function(x) length(which(x))
nmiss <- tapply(unlist(imp_index), list(region), getImp)
}
nobs <- tapply(x, list(region), length)
pmiss <- 100*nmiss/nobs
# check breakpoints
if(is.null(range)) {
range <- c(0, if(prop) ceiling(max(pmiss)) else max(nmiss))
} else {
# TODO: check 'range'
}
# get colors for regions
n <- rep(n, length.out=1)
if(isTRUE(n > 1)) {
# equally spaced categories
breaks <- seq(range[1], range[2], length=n+1)
cat <- cut(if(prop) pmiss else nmiss, breaks,
labels=FALSE, include.lowest=TRUE)
pcol <- seq(0, 1, length=n)
cols <- colSequence(pcol, start, end, space, gamma=gamma, fixup=fixup)
cols <- cols[cat]
} else {
# continuous color scheme
n <- NULL
pcol <- if(prop) pmiss else nmiss
pcol <- (pcol - range[1])/diff(range)
cols <- colSequence(pcol, start, end, space, gamma=gamma, fixup=fixup)
}
cols <- cols[polysRegion]
localPlot <- function(..., cex.text, col.text) plot(...)
localPlot(map, col=cols, ...)
if(isTRUE(numbers)) {
# number or percentage of missings as labels for regions
if(is.null(coords)) coords <- coordinates(map)
labs <- if(prop) paste(round(pmiss, digits), "%", sep="") else nmiss
plabs <- labs[polysRegion]
plabs[duplicated(polysRegion)] <- ""
text(coords, labels=plabs, cex=cex.numbers, col=col.numbers)
}
# useful statistics for legend
cmap <- list(nmiss=nmiss, nobs=nobs, pmiss=pmiss, prop=prop, range=range,
n=n, start=start, end=end, space=space, gamma=gamma, fixup=fixup)
if(isTRUE(legend)) {
usr <- par("usr")
xrange <- usr[1:2]
xdiff <- usr[2] - usr[1]
yrange <- usr[3:4]
ydiff <- usr[4] - usr[3]
length <- 1/3
height <- 0.1*length
xleft <- xrange[1] + 0.02*xdiff
xright <- xleft + length*xdiff
ytop <- yrange[2] - 0.02*ydiff
ybottom <- ytop - height*ydiff
colormapMissLegend(xleft, ybottom, xright, ytop,
cmap, cex.numbers=cex.numbers, col.numbers=col.numbers)
}
if(isTRUE(interactive)) {
cat("Click on a region to get more information about missings.\n")
cat("To regain use of the R console, click outside the borders.\n")
p <- locatorVIM()
while(!is.null(p)) {
p <- SpatialPoints(matrix(unlist(p), ncol=2))
poly <- over(p, map)
ind <- polysRegion[poly]
if(!is.na(ind)) {
if(!imputed) label <- "missings"
else label <- "imputed missings"
cat(paste("\n ", levels(region)[ind], ":", sep=""))
cat(paste("\n Number of ", label, ": ", nmiss[ind]))
cat(paste("\n Number of observations:", nobs[ind]))
cat(paste("\n Proportion of ", label, ": ",
round(pmiss[ind], digits), "%\n", sep=""))
p <- locatorVIM()
} else p <- NULL
}
}
# return statistics invisibly
invisible(cmap)
}
## legend
#' @export colormapMissLegend
#' @rdname colormapMiss
colormapMissLegend <- function(xleft, ybottom, xright, ytop, cmap,
# range, prop = FALSE, col = "red",
n = 1000, horizontal = TRUE, digits = 2,
cex.numbers = 0.8, col.numbers = par("fg"),
...) {
# back compatibility
dots <- list(...)
dn <- names(dots)
if(missing(cmap)) {
if("range" %in% dn) range <- dots$range
else stop("argument 'range' is missing, with no default")
prop <- if("prop" %in% dn) dots$prop else FALSE
col <- if("col" %in% dn) dots$col else "red"
cmap <- list(prop=prop, range=range, n=NULL, start=par("bg"),
end=col, space="rgb", gamma=2.4, fixup=TRUE)
}
if(missing(cex.numbers) && "cex.text" %in% dn) cex.numbers <- dots$cex.text
if(missing(col.numbers) && "col.text" %in% dn) col.numbers <- dots$col.text
# initializations
prop <- isTRUE(cmap$prop)
range <- cmap$range
cont <- is.null(cmap$n) # is legend for continuous color scheme?
n <- if(cont) n else cmap$n
n <- rep(n, length.out=1)
# allow to plot legend outside plot region
op <- par(xpd=TRUE)
on.exit(par(op))
# compute steps for legend
length <- xright - xleft
height <- ytop - ybottom
# compute colors for legend
col <- colSequence(seq(0, 1, length=n), cmap$start, cmap$end,
cmap$space, gamma=cmap$gamma, fixup=cmap$fixup)
# compute grid and position of legend
grid <- seq(0, 1, length=n+1)
if(cont) {
pos <- 0:1
ann <- range
} else {
pos <- grid
ann <- seq(range[1], range[2], length=n+1)
}
ann <- if(prop) paste(format(ann, digits), "%", sep="") else ann
# plot legend
# TODO: check space for labels
if(horizontal) {
grid <- grid*length + xleft
if(cont) {
rect(grid[-(n+1)], ybottom, grid[-1], ytop, col=col, border=NA)
rect(xleft, ybottom, xright, ytop, border=NULL)
} else rect(grid[-(n+1)], ybottom, grid[-1], ytop, col=col, border=NULL)
pos <- pos*length + xleft
text(pos, ybottom-0.25*height, labels=ann,
adj=c(0.5,1), cex=cex.numbers, col=col.numbers)
} else {
grid <- grid*height + ybottom
if(cont) {
rect(xleft, grid[-(n+1)], xright, grid[-1], col=col, border=NA)
rect(xleft, ybottom, xright, ytop, border=NULL)
} else rect(xleft, grid[-(n+1)], xright, grid[-1], col=col, border=NULL)
pos <- pos*height + ybottom
text(xright+0.25*length, pos, labels=ann,
adj=c(0,0.5), cex=cex.numbers, col=col.numbers)
}
invisible()
}