Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: basic outline of base version of repel #157

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ Authors@R: c(
person("Amir Masoud", "Abdol", role = "ctb"),
person("Malcolm", "Barrett", role = "ctb", comment = c(ORCID = "0000-0003-0299-5825")),
person("Robrecht", "Cannoodt", role = "ctb", comment = c(ORCID = "0000-0003-3641-729X")),
person("Michał", "Krassowski", role = "ctb", comment = c(ORCID = "0000-0002-9638-7785"))
person("Michał", "Krassowski", role = "ctb", comment = c(ORCID = "0000-0002-9638-7785")),
person("Michael", "Chirico", role = "ctb")
)
Title: Automatically Position Non-Overlapping Text Labels with 'ggplot2'
Title: Automatically Position Non-Overlapping Text Labels in plots
Description: Provides text and label geoms for 'ggplot2' that help to avoid
overlapping text labels. Labels repel away from each other and away from the
data points.
data points. Workhorse functions are exposed to accomplish the same in base
graphics.
Remotes:
github::tidyverse/ggplot2
Depends:
Expand Down
126 changes: 126 additions & 0 deletions R/base-repel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
get_boxes = function(
x, y, labels, adj, pos, offset, vfont, cex, font
) {
# par('ps') appears to be handled by strwidth
delx = strwidth(labels, cex = cex, font = font, vfont = vfont)
dely = strheight(labels, cex = cex, font = font, vfont = vfont)

# not completely satisfactory, but should suffice:
# https://stackoverflow.com/questions/61651361
cbind(
x1 = x - adj[1L]*delx, y1 = y - adj[2L]*dely,
x2 = x + (1-adj[1L])*delx, y2 = y + (1-adj[2L])*dely
)
}

#' text_repel
#'
#' This function provides an extension to \code{\link[graphics]{text}} that
#' strives to prevent overlapping text labels in your plot. Arguments are
#' aligned to \code{text} to the extent possible.
#'
#' @param x,y,labels,adj,pos,offset,vfont,cex,col,font These parameters
#' are all defined as in \code{\link[graphics]{text}}; see that page for more,
#' and see Details below for some further clarifications / caveats.
#' @param point.padding,force,force_pull,max.time,max.iter,max.overlaps,direction
#' These parameters are all defined as in \code{\link{geom_text_repel}}.
#' @param ... Additional parameters to be passed on to \code{\link[graphics]{text}}.
#'
#' @details A crucial step for the repellence algorithm is to identify a
#' bounding box for each of the \code{labels}. This is difficult to do
#' robustly/portably; best approximations are taken where appropriate.
#'
#' Specifically, \code{\link[graphics]{strwidth}} and \code{strheight} are
#' used to determine the width as plotted of \code{labels}. This appears to
#' work well for the default \code{adj}, less perfectly for \code{adj[1L]}
#' close to \code{0} or \code{1}.
#'
#' \code{pos} is mapped to a value of \code{adj} as follows:
#' * \code{pos=1L} becomes \code{adj=c(.5, .5)}
#' * \code{pos=2L} becomes \code{adj=c(1, 0)}
#' * \code{pos=3L} becomes \code{adj=c(.5, 0)}
#' * \code{pos=4L} becomes \code{adj=c(0, 0)}
#'
#' This is slightly different from \code{text()}, which further
#' accounts for the \code{yCharOffset} device attribute (this is not exposed
#' to the R API); see the \dQuote{R Internals Manual} and the source code
#' for \code{text} in \file{src/library/graphics/src/plot.c} of the R source.
#'
#' For \code{offset}, "character width" is simply mapped to
#' \code{\link[graphics]{par}('cxy')}.
#'
#' @export
text_repel = function(
x, y = NULL, labels, adj = NULL, pos = NULL, offset = .5,
vfont = NULL, cex = 1, col = NULL, font = NULL,
point.padding = 0, force = 1, force_pull = 1,
max.time = .5, max.iter = 10000L, max.overlaps = 10L,
direction = c('both', 'y', 'x'), ...
) {
if ('srt' %in% names(list(...))) stop("'srt' not yet supported")
# like in text.default
if (!missing(y) && (is.character(y) || is.expression(y))) {
labels = y
y = NULL
}
xy = xy.coords(x, y, recycle = TRUE, setLab = FALSE)
# duplicate so we can overwrite & still retain info to draw segments()
xx = xy$x
yy = xy$y

direction = match.arg(direction)

# from src/library/graphics/plot.c:C_text, setting pos is a shortcut
# for setting adj: 1->[.5, .5]; 2->[1, 0]; 3->[.5, 0]; 4->[0, 0];
# application of offset is also as there
# [I'm ignoring 'dd->dev->yCharOffset' which AFAICT is not exposed to
# R outside C, and which is described as "mysterious" in R-ints manual]
if (is.null(pos)) {
# will grow adj if adj is length-1
if (is.null(adj)) adj = c(par('adj'), NA_real_)
if (length(adj) == 1L) adj[2L] = .5
if (anyNA(adj)) adj[is.na(adj)] = .5
} else {
pos = as.integer(pos)
offset = offset * par('cxy')
if (length(pos) > 1L) stop("'pos' must have length one")
if (pos == 1L) {
yy = y - offset
adj = c(.5, .5)
} else if (pos == 2L) {
xx = x - offset
adj = c(1, 0)
} else if (pos == 3L) {
yy = y + offset
adj = c(.5, 0)
} else if (pos == 4L) {
xx = x + offset
adj = c(0, 0)
} else stop("Invalid value for 'pos' [",pos,"]; valid values are 1,2,3,4")
}

lims = par('usr')

repel = repel_boxes2(
data_points = cbind(xx, yy),
point_size = strheight('m', cex = cex, font = font, vfont = vfont),
point_padding_x = point.padding,
point_padding_y = point.padding,
boxes = get_boxes(xx, yy, labels, adj, pos, offset, vfont, cex, font),
xlim = lims[1:2],
ylim = lims[3:4],
hjust = adj[1L],
vjust = adj[2L],
force_push = force * 1e-6,
force_pull = force_pull * 1e-2,
max_time = max.time,
max_iter = max.iter,
max_overlaps = max.overlaps,
direction = direction
)

text(repel$x, repel$y, labels, adj = adj, pos = pos, offset = offset,
vfont = vfont, cex = cex, col = col, font = font, ...)

segments(x, y, repel$x, repel$y)
}
Binary file added test-base/figures/adj.pdf
Binary file not shown.
Binary file added test-base/figures/cex.pdf
Binary file not shown.
Binary file added test-base/figures/cex_vector.pdf
Binary file not shown.
Binary file added test-base/figures/defaults.pdf
Binary file not shown.
Binary file added test-base/figures/font.pdf
Binary file not shown.
Binary file added test-base/figures/font_vector.pdf
Binary file not shown.
Binary file added test-base/figures/force.pdf
Binary file not shown.
Binary file added test-base/figures/offset.pdf
Binary file not shown.
Binary file added test-base/figures/point_padding.pdf
Binary file not shown.
Binary file added test-base/figures/pos.pdf
Binary file not shown.
Binary file added test-base/figures/ps.pdf
Binary file not shown.
Binary file added test-base/figures/vfont.pdf
Binary file not shown.
170 changes: 170 additions & 0 deletions test-base/test-base-plots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
library(ggrepel)
source('R/RcppExports.R')
source('R/base-repel.R')

old_wd = setwd('test-base')

fig_dir = 'figures'
dir.create(fig_dir, showWarnings = FALSE)
open_pdf = function(filename, dim) {
pdf(file.path(fig_dir, paste0(filename, '.pdf')),
width = 7*dim[2L], height = 7*dim[1L])
par(mfrow = dim, mar = c(0, 0, 0, 0), ps = 14)
}

x = mtcars$wt
y = mtcars$mpg
labels = rownames(mtcars)

# ---- DEFAULTS ----
open_pdf('defaults', 1:2)
plot(x, y, main = 'text()', axes = FALSE, xlab = '', ylab = '')
text(x, y, labels)

plot(x, y, main = 'repel_text()', axes = FALSE, xlab = '', ylab = '')
text_repel(x, y, labels)
dev.off()

# ---- ADJ ----
open_pdf('adj', c(3L, 3L))
for (xadj in c(0, .5, 1)) {
for (yadj in c(0, .5, 1)) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('xadj=%.1f | yadj=%.1f', xadj, yadj))
text(x, y, labels, adj = c(xadj, yadj), col = 'gray')
text_repel(x, y, labels, adj = c(xadj, yadj))
}
}
dev.off()

# ---- POS ----
open_pdf('pos', c(2L, 2L))
for (pos in 1:4) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('pos=%d', pos))
text(x, y, labels, pos = pos, col = 'gray')
text_repel(x, y, labels, pos = pos)
}
dev.off()

# ---- POS WITH OFFSET ----
open_pdf('offset', 3:4)
for (offset in c(0, .5, 1)) {
for (pos in 1:4) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('pos=%d | offset=%.1f', pos, offset))
text(x, y, labels, pos = pos, offset = offset, col = 'gray')
text_repel(x, y, labels, pos = pos, offset = offset)
}
}
dev.off()

# --- VFONT ---
open_pdf('vfont', c(3L, 3L))
for (ii in sample(nrow(Hershey$allowed), 9L)) {
vfont = c(
Hershey$typeface[Hershey$allowed[ii, 1L]],
Hershey$fontindex[Hershey$allowed[ii, 2L]]
)
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('vfont=[%s,%s]', vfont[1L], vfont[2L]))
text(x, y, labels, vfont = vfont, col = 'gray')
text_repel(x, y, labels, vfont = vfont)
}
dev.off()

# ---- CEX ----
open_pdf('cex', c(3L, 3L))
for (cex in c(.5, .6, .8, .9, 1, 1.1, 1.25, 1.33, 2, 5)) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('cex=%.2f', cex))
text(x, y, labels, cex = cex, col = 'gray')
text_repel(x, y, labels, cex = cex)
}
dev.off()

# ---- CEX AS VECTOR ----
open_pdf('cex_vector', c(1L, 1L))
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
cex = runif(length(x), .5, 1.5)
text(x, y, labels, cex = cex, col = 'gray')
text_repel(x, y, labels, cex = cex)
dev.off()

# ---- FONT ----
## per src/library/graphics/src/plot.c:FixupFont, font
## can be 1,2,3,4,5 on Unix, up to 32 on Windows
open_pdf('font', c(2L, 2L))
for (font in 1:4) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('font=%d', font))
text(x, y, labels, font = font, col = 'gray')
text_repel(x, y, labels, font = font)
}
dev.off()

# ---- FONT AS VECTOR ----
open_pdf('font_vector', c(1L, 1L))
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
font = sample(1:4, length(x), replace = TRUE)
text(x, y, labels, font = font, col = 'gray')
text_repel(x, y, labels, font = font)
dev.off()

# ---- PAR(PS) ----
open_pdf('ps', c(2L, 2L))
old = par('ps')
for (ps in c(8, 12, 18, 24)) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('ps=%d', ps))
par(ps = ps)
text(x, y, labels, col = 'gray')
text_repel(x, y, labels)
}
par(ps = old)
dev.off()

# ---- POINT.PADDING ----
open_pdf('point_padding', c(2L, 2L))
for (padding in c(0, .5, 1, 2)) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('point.padding=%.1f', padding))
text(x, y, labels, col = 'gray')
text_repel(x, y, labels, point.padding = padding)
}
dev.off()

# ---- FORCE, FORCE_PULL----
open_pdf('force', c(3L, 3L))
for (force in c(.5, 1, 5)) {
for (force_pull in c(.5, 1, 5)) {
plot(x, y, axes = FALSE, main = '', xlab = '', ylab = '')
box()
mtext(side = 3L, line = -2, sprintf('force=%.1f | force_pull=%.1f', force, force_pull))
text(x, y, labels, col = 'gray')
text_repel(x, y, labels, force = force, force_pull = force_pull)
}
}
dev.off()

# ---- MAX.TIME ----

# ---- MAX.ITER ----

# ---- MAX.OVERLAPS ----

# ---- DIRECTION ----

# ---- ERRORS ----

setwd(old_wd)