R/geometry_helpers.R

##---------------------------------------------------------------------------##
##                                                                           ##
##  geom_textpath main                                                       ##
##                                                                           ##
##  Copyright (C) 2021 by Allan Cameron & Teun van den Brand                 ##
##                                                                           ##
##  Licensed under the MIT license - see https://mit-license.org             ##
##  or the LICENSE file in the project root directory                        ##
##                                                                           ##
##---------------------------------------------------------------------------##

# Helpers -----------------------------------------------------------------

## Adding path data -------------------------------------------------------

#' Supplement path data
#'
#' This function supplements a single path given as x and y coordinates with
#' information about the shape of the curve.
#'
#' @param .data A `data.frame` with `x` and `y` numeric columns.
#'
#' @return A `data.frame` with additional columns `angle`, `length` and
#'   `adj_length`.
#' @noRd
#'
#' @details This function does the work of calculating the gradient of the path
#'   at each `x, y` value along its length, and the angle this implies that text
#'   should sit on the path (measured in degrees, not radians). It takes a
#'   group-subset of the layer data frame as input, so this function needs to
#'   be `lapply()`-ed to the list formed by splitting the layer data frame by
#'   group. This has to be done *after* transforming the data to co-ordinate
#'   space with `coord$transform()`, otherwise the angles will be wrong.
#'
#' @examples
#' xy <- data.frame(
#'   x =  1:10,
#'   y = (1:10)^2
#' )
#'
#' .add_path_data(xy)
.add_path_data <- function(.data)
{
  # Gradient is found and converted to angle here. Since we use approx
  # to interpolate angles later, we can't have any sudden transitions
  # where angles "wrap around" from +180 to -180, otherwise we might
  # interpolate in this transition and get letters with an angle of
  # around 0. When combined with a vjust, this also makes the letters
  # jump out of alignment. This little algorithm makes sure the changes
  # in angle never wrap around.
  dx <- diff(.data$x)
  dy <- diff(.data$y)
  grad <- dy / dx

  rads <- atan(grad)
  if (length(rads) > 1) {
    diff_rads <- diff(rads)
    diff_rads <- ifelse(diff_rads < - pi / 2, diff_rads + pi, diff_rads)
    diff_rads <- ifelse(diff_rads > + pi / 2, diff_rads - pi, diff_rads)
    rads <- cumsum(c(rads[1], 0, diff_rads))
  } else {
    diff_rads <- c(0, 0)
    rads <- rep(rads, 2)
  }

  # Now we can safely convert to degrees
  .data$angle <- rads * 180 / pi

  # Letters need to be spaced according to their distance along the path, so
  # we need a column to measure the distance of each point along the path
  dist <- sqrt(dx^2 + dy^2)
  .data$length <- c(0, cumsum(dist))

  # We also need to define curvature of the line at each point.
  # This is how much the angle changes per unit distance. We need to use
  # radians here. We need to know the curvature to increase or decrease
  # the spacing between characters when vjust is used, otherwise the spacing
  # will be inconsistent across sections with different curvature

  diff_rads <- approx(seq_along(diff_rads), diff_rads,
                      seq(1, length(diff_rads), length.out = nrow(.data) - 1))$y

  curvature <- diff_rads/dist

  adj_vjust <- .data$vjust %||% c(0.5, 0.5) # Set default vjust if absent from data
  adj_vjust <- ((head(adj_vjust, -1) + tail(adj_vjust, -1)) / 2 - 0.5)
  effective_length <- dist * (1 + adj_vjust * curvature / 5)

  .data$adj_length <- c(0, cumsum(effective_length))

  .data
}

## Getting path points ----------------------------------------------------

calc_offset <- function(x, y, d = 0, debug = FALSE) {
  n  <- length(x)
  dx <- diff(x)
  dy <- diff(y)
  ang  <- atan(dy / dx)
  if (length(ang) > 1) {
    dang <- diff(ang)
    dang <- ifelse(dang < - pi / 2, dang + pi, dang)
    dang <- ifelse(dang > + pi / 2, dang - pi, dang)
    ang  <- cumsum(c(ang[1], 0, dang))
  } else {
    dang <- c(0, 0)
    ang <- rep(ang, 2)
  }

  xstart <- x[1] + cos(ang[1] + pi / 2) * d
  ystart <- y[1] + sin(ang[1] + pi / 2) * d

  lens <- sqrt(dx^2 + dy^2)

  dang <- approx(seq_along(dang), dang,
                 seq(1, length(dang), length.out = n - 1))$y

  curv <- (dang / lens * pi)# / (2 * pi) #/ (2 * pi) #* 0.9549297 # Rounding error correction?
  curv <- 1 - outer(curv, d)

  eff_len <- rbind(0, curv * lens)
  xout <- apply(cos(ang) * eff_len, 2, cumsum) + rep(xstart, each = n)
  yout <- apply(sin(ang) * eff_len, 2, cumsum) + rep(ystart, each = n)
  eff_len <- apply(eff_len, 2, cumsum)
  eff_len <- apply(eff_len, 2, cummax) # Should only ever increase

  if (debug) {
    plot(x, y, type = 'b',
         ylim = range(c(y, yout)), xlim = range(c(x, xout)))
    for (i in seq_len(ncol(xout))) {
      lines(xout[,i], yout[,i], col = i + 1, type = 'b', pch = 16)
    }
    sqrt((x - xout)^2 + (y - yout)^2)
  }
  list(
    x = xout,
    y = yout,
    length = eff_len
  )
}

.get_path_points <- function(
  path,
  label = "placeholder",
  gp = get.gpar(),
  hjust = 0.5, vjust = 0.5,
  halign = 0.5
) {
  ppi <- 72

  # Meaure text
  letters <- measure_text(label, gp = gp, ppi = ppi,
                          hjust = halign[1], vjust = vjust[1])
  string_size <- attr(letters, "metrics")$width
  y_pos <- unique(c(0, letters$ymin))

  offset <- calc_offset(path$x, path$y, d = y_pos)
  n <- nrow(path)

  length <- offset$length

  # Calculate anchorpoint
  anchor <- hjust[1] * (length[n, 1] - string_size) + hjust[1] * string_size
  i <- findInterval(anchor, length[, 1], all.inside = TRUE)
  di <- (anchor - length[i, 1]) / (length[i + 1, 1] - length[i, 1])
  anchor <- length[i, ] * (1 - di) + length[i + 1, ] * di

  # Offset text x by anchorpoint
  xpos <- c("xmin", "xmid", "xmax")
  letters$yid <- match(letters$ymin, y_pos)
  letters[, xpos] <- letters[, xpos] + anchor[letters$yid]

  # Project text on path
  index <- x <- unlist(letters[, xpos], FALSE, FALSE)
  membr <- rep(letters$yid, 3)
  split(index, membr) <- Map(
    findInterval,
    x   = split(index, membr),
    vec = asplit(length[, unique(membr), drop = FALSE], MARGIN = 2),
    all.inside = TRUE
  )
  i0 <- cbind(index + 0, membr)
  i1 <- cbind(index + 1, membr)
  di <- (x - length[i0]) / (length[i1] - length[i0])
  new_x <- offset$x[i0] * (1 - di) + offset$x[i1] * di
  new_y <- offset$y[i0] * (1 - di) + offset$y[i1] * di
  dim(new_x) <- dim(new_y) <- c(nrow(letters), 3)

  # Calculate text angles
  dx <- new_x[, 3] - new_x[, 1]
  dy <- new_y[, 3] - new_y[, 1]
  ang <- atan2(dy, dx) * 180 / pi

  # Format output
  df <- as.list(path[setdiff(names(path), c("x", "y", "angle"))])
  is_num <- vapply(df, is.numeric, logical(1))
  df[is_num] <- lapply(df[is_num], function(i) {
    approx(x = path$adj_length, y = i, xout = letters$xmid, ties = mean)$y
  })
  df[!is_num] <- lapply(lapply(df[!is_num], `[`, 1L),
                        rep, length.out = nrow(letters))
  debug <- FALSE
  if (isTRUE(debug)) {
    plot(path$x, path$y, type = 'b', pch = 16)
    lines(offset$x[, 2], offset$y[, 2], col = 2, type = 'b')
    points(new_x[, c(1, 3)], new_y[, c(1, 3)], pch = 1)
    points(new_x[, 2], new_y[, 2], pch = 16)
    print(ang)
    lines(
      x = c(new_x[, 2], new_x[, 2] + cos((ang + 90) * pi / 180) * 1),
      y = c(new_y[, 2], new_y[, 2] + sin((ang + 90) * pi / 180) * 1),
      lty = 2
    )
  }

  df$angle <- ang
  df$x <- new_x[, 2]
  df$y <- new_y[, 2]
  df$label <- letters$glyph
  df <- list_to_df(df)
  df[!is.na(df$angle), ]
}

#' #' Interpolate path at text locations
#' #'
#' #' This function aids in specifying the `x`, `y` and angle components of where
#' #' individual letters should be placed, of a single path-label pair.
#' #'
#' #' @param path A `data.frame` with the numeric columns `x`, `y`, `angle`,
#' #'   `length` and `adj_length`.
#' #' @param label A `character(1)` scalar with a string to place.
#' #' @param gp An object of class `"gpar"`, typically the output from a call to
#' #'   the `grid::gpar()` function. Note that parameters related to fonts *must*
#' #'   be present. To be exact, the following parameters cannot be missing:
#' #'   `fontfamily`, `font`, `fontsize` and `lineheight`.
#' #' @param hjust A `numeric(1)` scalar specifying horizontal justification along
#' #'   the path.
#' #'
#' #' @return A `data.frame` with numerical values interpolated at the points where
#' #'   the letters in `label` argument should be placed, along with a `label`
#' #'   column containing individual glyphs of the string.
#' #' @noRd
#' #'
#' #' @details This is another helper function for the draw_panel function.
#' #' This is where
#' #' the text gets split into its component parts and assigned x, y and angle
#' #' components. This function also takes one group subset of the main panel data
#' #' frame at a time after .add_path_data() has been called, and returns a
#' #' modified data frame.
#' #'
#' #' The hjust is also applied here. Actually, although it's called hjust, this
#' #' parameter is really just analogous to hjust, and never gets passed to grid.
#' #' It determines how far along the path the string will be placed. The
#' #' individual letters all have an hjust of 0.5.
#' #'
#' #' @examples
#' #' xy <- data.frame(
#' #'   x =  1:10,
#' #'   y = (1:10)^2
#' #' )
#' #'
#' #' xy <- .add_path_data(xy)
#' #'
#' #' .get_path_points(xy)
#' .get_path_points <- function(path, label = "placeholder",
#'                              gp = get.gpar(), hjust = 0.5)
#' {
#'   # Get pixels per inch (72 is default screen resolution). For some reason text
#'   # renders weirdly if this is adapted to the device. For raster graphics,
#'   # one would typically use the following:
#'   # ppi <- dev.size("px")[1] / dev.size("in")[1]
#'   # But that gives the wrong spacing here.
#'   ppi <- 72
#'
#'   # Using the shape_string function from package "systemfonts" allows fast
#'   # and accurate calculation of letter spacing
#'
#'   letters <- measure_text(label, gp = gp, ppi = ppi, vjust = 0.5,
#'                           hjust = hjust[1], path_len = max(path$adj_length))
#'
#'   # We now need to interpolate all the numeric values along the path so we
#'   # get the appropriate values at each point. Non-numeric values should all
#'   # be identical, so these are just kept as-is
#'
#'   df <- as.list(path[setdiff(names(path), c("x", "y", "angle"))])
#'   is_num <- vapply(df, is.numeric, logical(1))
#'   df[is_num] <- lapply(df[is_num], function(i) {
#'     approx(x = path$adj_length, y = i, xout = letters$xmid, ties = mean)$y
#'   })
#'   df[!is_num] <- lapply(lapply(df[!is_num], `[`, 1L),
#'                         rep, length.out = nrow(letters))
#'
#'   # Instead of interpolating the angle from what we've calculated earlier and
#'   # what should  apply to the letter mid-points, we are re-calculating the angle
#'   # from the letter start and end points to get better angles for coarse paths
#'
#'   # Interpolate x coordinates
#'   f    <- approxfun(x = path$adj_length, y = path$x)
#'   dx   <- f(letters$xmax) - f(letters$xmin)
#'   df$x <- f(letters$xmid)
#'
#'   # Interpolate y coordinates
#'   f    <- approxfun(x = path$adj_length, y = path$y)
#'   dy   <- f(letters$xmax) - f(letters$xmin)
#'   df$y <- f(letters$xmid)
#'
#'   # Recalculate angle
#'   df$angle <- atan2(dy, dx) * 180 / pi
#'
#'   # Now we assign each letter to its correct point on the path
#'   df$label <- letters$glyph
#'
#'   # This ensures that we don't try to return any invalid letters
#'   # (those letters that fall off the path on either side will have
#'   # NA angles)
#'   df <- list_to_df(df)
#'   df[!is.na(df$angle), ]
#' }

#' Wrapper for text measurement
#'
#' This wrap the `systemfonts::shape_string()` function to return positions for
#' every letter.
#'
#' @param label A `character(1)` of a label.
#' @param gp A `grid::gpar()` object.
#' @param ppi A `numeric(1)` for the resolution in points per inch.
#' @param vjust The justification of the text.
#'
#' @return A `data.frame` with the columns `glyph`, `ymin`, `xmin`, `xmid` and
#'   `xmax`.
#' @noRd
#'
#' @examples
#' measure_text("Hello there,\nGeneral Kenobi")
measure_text <- function(label, gp = get.gpar(), ppi = 72,
                         vjust = 0.5, hjust = 0.5) {
  vjust[vjust == 1] <- 1 + .Machine$double.eps
  txt <- shape_string(
    strings    = label[1],
    family     = gp$fontfamily[1],
    italic     = gp$font[1] %in% c(3, 4),
    bold       = gp$font[1] %in% c(2, 4),
    size       = gp$fontsize[1],
    lineheight = gp$lineheight[1],
    tracking   = gp$tracking[1] %||% 0,
    res = ppi,
    vjust = vjust,
    hjust = hjust
  )
  # Adjust metrics
  metrics <- txt$metrics
  metrics$width  <- metrics$width  / ppi
  metrics$height <- metrics$height / ppi

  # Adjust shape
  txt <- txt$shape
  txt$x_offset   <- txt$x_offset   / ppi
  txt$x_midpoint <- txt$x_midpoint / ppi

  # Format shape
  ans <- data_frame(
    glyph =  txt$glyph,
    ymin  =  txt$y_offset / ppi,
    xmin  =  txt$x_offset,
    xmid  = (txt$x_offset + txt$x_midpoint),
    xmax  = (txt$x_offset + txt$x_midpoint * 2)
  )
  attr(ans, "metrics") <- metrics
  return(ans)
}

## Getting surrounding lines -----------------------------------------------

## TODO: Do we want to add a parameter to switch the lines on and off,
##       inside geom_textpath(), or simply set a default linewidth of 0?
## RE: We could separate it into two geoms, one with a path by default and one
##     without. I think some graphics devices interpret 0-linewidth differently,
##     so the safer option would be to use `linetype = 0`, I think.

## TODO: Below, we're using `vjust` to determine where to cut the path if it
##       intersects text, but that doesn't take ascenders and descenders into
##       account.

## TODO: Sometimes when the device is really small or the letters huge, there
##       can be a letters data.frame that has 0 rows for a group. We should
##       defensively code something against this.

#' Trim text area from path
#'
#' This function splits a path when a string is predicted to intersect with
#' the path.
#'
#' @param path A `data.frame` with at least a numeric `length` column, an
#'   integer `id` column and `vjust` column. The `id` column must match that in
#'   the `letters` argument.
#' @param letters A `data.frame` with at least a numeric `length` column and
#'   integer `id` column. The `id` column must match that in the `path`
#'   argument.
#' @param cut_path A single logical TRUE or FALSE which if TRUE breaks the path
#'   into two sections, one on either side of the string and if FALSE leaves the
#'   path unbroken. The default value is NA, which will break the line if the
#'   string has a vjust of between 0 and 1
#' @param vjust_lim A `numeric` of length two setting the lower and upper limits
#'   of the `vjust` column in the `path` argument, which is used to decide
#'   whether a path should be trimmed or not when `cut_path = NA`.
#'
#' @details We probably want the option to draw the path itself, since this will
#'   be less work for the end-user. If the `vjust` is between 0 and 1 then the
#'   path will clash with the text, so we want to remove the segment where the
#'   text is. This function will get the correct segments in either case,
#'   but it needs the whole path data *and* the calculated string data to do it.
#'
#' @return The `path` data.frame filtered for clashing segments and including
#'   a `section` column indicated it was not clipped ("all"), before ("pre") or
#'   after ("post") clipping.
#' @noRd
#'
#' @examples
#' xy <- data.frame(
#'   x =  1:10,
#'   y = (1:10)^2,
#'   id = 1
#' )
#'
#' xy <- .add_path_data(xy)
#' glyphs <- .get_path_points(xy)
#' .get_surrounding_lines(xy, glyphs)
.get_surrounding_lines <- function(path, letters, cut_path = NA,
                                   breathing_room = 0.15,
                                   vjust_lim = c(0, 1)) {

  path$trim <- (path$group_max_vjust >= vjust_lim[1] &
                path$group_min_vjust <= vjust_lim[2] ) |
               (path$group_max_vjust <= vjust_lim[2] &
                path$group_min_vjust >= vjust_lim[1])

  path$trim <- if (!is.na(cut_path)) rep(cut_path, nrow(path)) else path$trim

  # Simplify if text isn't exactly on path
  if (!any(path$trim)) {
    path$section <- "all"
  } else {
    trim <- path$trim[c(TRUE, path$id[-1] != path$id[-nrow(path)])]

    # Get locations where strings start and end
    letter_lens <- run_len(letters$id)
    starts <- {ends <- cumsum(letter_lens)} - letter_lens + 1
    mins <- letters$length[starts]
    maxs <- letters$length[ends]

    # Create breathing space around letters
    path_max <- vapply(split(path$length, path$id), max,
                       numeric(1), USE.NAMES = FALSE)

    mins <- pmax(0, mins - breathing_room)
    maxs <- pmin(path_max, maxs + breathing_room)

    # Consider path length as following one another to avoid a loop
    sumlen <- c(0, path_max[-length(path_max)])
    sumlen <- cumsum(sumlen + seq_along(path_max) - 1)
    mins <- mins + sumlen
    maxs <- maxs + sumlen
    path$length <- path$length + sumlen[path$id]

    # Assign sections based on trimming
    section <- character(nrow(path))
    section[path$length <= mins[path$id]] <- "pre"
    section[path$length >= maxs[path$id]] <- "post"
    section[!trim[path$id]] <- "all"

    # Interpolate trimming points
    ipol <- c(mins[trim], maxs[trim])
    trim_x <- approx(path$length, path$x, ipol)$y
    trim_y <- approx(path$length, path$y, ipol)$y

    # Add trimming points to paths
    path <- data_frame(
      x  = c(path$x, trim_x),
      y  = c(path$y, trim_y),
      id = c(path$id, rep(which(trim), 2L)),
      section = c(section, rep(c("pre", "post"), each = sum(trim)))
    )[order(c(path$length, ipol)), , drop = FALSE]

    # Filter empty sections (i.e., the part where the string is)
    path <- path[path$section != "", , drop = FALSE]
  }

  if (nrow(path) > 0) {
    # Get first point of individual paths
    new_id <- paste0(path$id, "&", path$section)
    new_id <- discretise(new_id)
    start  <- c(TRUE, new_id[-1] != new_id[-length(new_id)])

    path$new_id <- new_id
    path$start  <- start
  } else {
    path$new_id <- integer(0)
    path$start  <- logical(0)
  }

  return(path)
}


## Split linebreaks  -----------------------------------------------

#' Split strings with linebreaks into different groups
#'
#' This function prepares the data for plotting by splitting labels
#' at line breaks and giving each its own group
#'
#' @param data A `data.frame` with at least a factor or character column
#'   called "label", integer columns called "group" and "linetype", and
#'   numeric columns called "vjust" and "lineheight".
#'
#' @details The returned data is split into groups, one group for each
#'   segment of text such that none have line breaks. For strings that
#'   initially contained line breaks, they are broken up into different
#'   groups with different vjust values. The vjust values of each text line
#'   are centered around the originally specified vjust,
#'
#' @return A data frame containing the same column names and types as the
#'   original, but with newlines now treated as different groups.
#' @noRd
#'
#' @examples
#' xy <- data.frame(
#'   x =  1:10,
#'   y = (1:10)^2,
#'   group = 1,
#'   label = "This string \n has a line break",
#'   vjust = 0.5,
#'   linetype = 1,
#'   lineheight = 1.2
#' )
#'
#' .groupify_linebreaks(xy)
.groupify_linebreaks <- function(data)
{
    data$label <- as.character(data$label)
    data$group_min_vjust <- data$vjust
    data$group_max_vjust <- data$vjust
    line_breakers <- data[grepl("[\r\n]", data$label),]
    non_breakers <- data[!grepl("[\r\n]", data$label),]
    pieces <- strsplit(line_breakers$label, "[\r\n]+")
    line_breakers <- do.call(rbind, lapply(seq_along(pieces), function(i){
      n <- length(pieces[[i]])
      df <- line_breakers[rep(i, n),]
      df$label <- pieces[[i]]
      df$vjust <- (seq(n) - n)  * df$lineheight[1] +
                  df$vjust[1] * df$lineheight[1] * (n - 1) + df$vjust[1]
      df$group <- rep(df$group[1] + seq(0, 1 - 1/n, 1/n),
                      length.out = nrow(df))
      line_type <- df$linetype[1]
      df$linetype <- 0
      df$linetype[which(df$vjust <= 1 & df$vjust >= 0)] <- line_type
      if(all(df$linetype == 0)) {
        df$linetype[which.min(abs(df$vjust))] <- line_type
      }
      df$group_min_vjust <- min(df$vjust)
      df$group_max_vjust <- max(df$vjust)
      df
    }))
    data <- rbind(line_breakers, non_breakers)

    data$group <- as.numeric(factor(data$group))

    data
}