r-lib · jennybc · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
@@ -5,6 +5,8 @@
   - Previously: `c("condition", "error", "gmail_error")`
   - Now: `c("gmailr_error", "gargle_error_request_failed", "http_error_{XXX}", "gargle_error", "rlang_error", "error", "condition")`
 
+* Text headers, such as `Subject`, are now properly prepared as per RFC 2047, fixing a problem with long-ish headers that contain non-ASCII characters (#193).
+
 ## Deprecations
 
 * Functions that lack the `gm_` prefix have been removed, concluding a deprecation process that kicked off with gmailr 1.0.0 (released 2019-08-23). These functions were hard deprecated in gmailr 2.0.0 (released 2023-06-30). This eliminates many name conflicts with other packages (including the base package).

@@ -205,32 +205,6 @@ gm_attach_file <- function(mime, filename, type = NULL, id = NULL, ...) {
   )
 }
 
-header_encode <- function(x) {
-  x <- enc2utf8(unlist(strsplit(as.character(x), ", ?")))
-
-  # this won't deal with <> used in quotes, but I think it is rare enough that
-  # is ok
-  m <- rematch2::re_match(x, "^(?<phrase>[^<]*?)(?: *<(?<addr_spec>[^>]+)>)?$")
-  res <- character(length(x))
-
-  # simple addresses contain no <>, so we don't need to do anything further
-  simple <- !nzchar(m$addr_spec)
-  res[simple] <- m$phrase[simple]
-
-  # complex addresses may need to be base64-encoded
-  needs_encoding <- Encoding(m$phrase) != "unknown"
-  res[needs_encoding] <- sprintf(
-    "=?utf-8?B?%s?=",
-    vcapply(m$phrase[needs_encoding], encode_base64)
-  )
-  res[!needs_encoding] <- m$phrase[!needs_encoding]
-
-  # Add the addr_spec onto non-simple examples
-  res[!simple] <- sprintf("%s <%s>", res[!simple], m$addr_spec[!simple])
-
-  paste0(res, collapse = ", ")
-}
-
 #' Convert a mime object to character representation
 #'
 #' This function converts a mime object into a character vector
@@ -240,8 +214,9 @@ header_encode <- function(x) {
 #' @param ... further arguments ignored
 #' @export
 as.character.mime <- function(x, newline = "\r\n", ...) {
-  # encode headers
-  x$header <- lapply(x$header, header_encode)
+  for (i in seq_along(x$header)) {
+    x$header[[i]] <- encode_header(names(x$header)[i], x$header[[i]])
+  }
 
   # Check if we need nested structure ((text + HTML) + attachments)
   has_both_bodies <- exists_list(x$parts, TEXT_PART) &&
@@ -364,3 +339,123 @@ with_defaults <- function(defaults, ...) {
   missing <- setdiff(names(defaults), names(args))
   c(defaults[missing], args)
 }
+
+# Header encoding helpers ------------------------------------------------------
+#
+# In general, the Gmail API requires following RFC 2822 Internet Message Format
+# https://datatracker.ietf.org/doc/html/rfc2822
+#
+# Then, within that, non-ASCII text in headers is addressed in RFC 2047 MIME
+# Part Three: Message Header Extensions for Non-ASCII Text
+# https://datatracker.ietf.org/doc/html/rfc2047
+#
+# Refactoring the header processing was motivated by
+# https://github.com/r-lib/gmailr/issues/193
+
+# Strategy: Divide headers into address headers vs. everything else.
+#
+# Use existing helper to encode address headers, as it was clearly written for
+# that use case.
+#
+# Use a new helper for other headers, that can deal with "folding" (see the RFC)
+# long-ish, non-ASCII text, e.g. in the Subject.
+
+encode_header <- function(name, value) {
+  address_headers <- c(
+    "To",
+    "From",
+    "Cc",
+    "Bcc",
+    "Reply-To",
+    "Sender",
+    "Resent-To",
+    "Resent-From",
+    "Resent-Cc",
+    "Resent-Bcc",
+    "Resent-Sender"
+  )
+
+  fun <- if (name %in% address_headers) {
+    header_encode_address
+  } else {
+    header_encode_text
+  }
+  fun(value)
+}
+
+# Pre-existing helper now renamed to reflect its motivating use case.
+# - May contain multiple comma-separated addresses
+# - Each address may have the format "Name" <email@example.com>
+# - Only the "Name" part needs encoding, not the email address
+header_encode_address <- function(x) {
+  x <- enc2utf8(unlist(strsplit(as.character(x), ", ?")))
+
+  # this won't deal with <> used in quotes, but I think it is rare enough that
+  # is ok
+  m <- rematch2::re_match(x, "^(?<phrase>[^<]*?)(?: *<(?<addr_spec>[^>]+)>)?$")
+  res <- character(length(x))
+
+  # simple addresses contain no <>, so we don't need to do anything further
+  simple <- !nzchar(m$addr_spec)
+  res[simple] <- m$phrase[simple]
+
+  # complex addresses may need to be base64-encoded
+  needs_encoding <- Encoding(m$phrase) != "unknown"
+  res[needs_encoding] <- sprintf(
+    "=?utf-8?B?%s?=",
+    vcapply(m$phrase[needs_encoding], encode_base64)
+  )
+  res[!needs_encoding] <- m$phrase[!needs_encoding]
+
+  # Add the addr_spec onto non-simple examples
+  res[!simple] <- sprintf("%s <%s>", res[!simple], m$addr_spec[!simple])
+
+  paste0(res, collapse = ", ")
+}
+
+# New helper for a generic "text" header
+# - Single value (not comma-separated)
+# - May contain long Unicode text that exceeds RFC 2047's 75-character limit
+# - Must be "folded" into multiple encoded-words if too long
+header_encode_text <- function(x) {
+  if (length(x) == 0 || is.null(x)) {
+    return(x)
+  }
+
+  x <- enc2utf8(as.character(x))
+
+  # Pass pure ASCII through unchanged
+  if (Encoding(x) == "unknown") {
+    return(x)
+  }
+
+  # First, get a single base64-encoded string
+  b64_full <- encode_base64(x, line_length = 0L, newline = "")
+  b64_len <- nchar(b64_full)
+
+  # encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+  # charset is utf-8
+  # encoding is "B" (as opposed to "Q"), as in "BASE64"
+  encode_word <- function(b64) sprintf("=?utf-8?B?%s?=", b64)
+
+  # RFC 2047: "An 'encoded-word' may not be more than 75 characters long,
+  # including 'charset', 'encoding', 'encoded-text', and delimiters."
+  # Format: =?utf-8?B?<encoded-text>?=
+  # The formalities account for 12 characters, which leaves up to 63 characters
+  # for the encoded text. However, base64 works in 4-character groups, so we
+  # must use a multiple of 4: the largest is 60.
+  max_b64_per_word <- 60
+
+  # Return as single encoded-word, if possible
+  if (b64_len <= max_b64_per_word) {
+    return(encode_word(b64_full))
+  }
+
+  # Otherwise, split into multiple encoded-words
+  starts <- seq(1L, b64_len, by = max_b64_per_word)
+  stops <- c(starts[-1] - 1L, b64_len)
+  encoded_words <- encode_word(substring(b64_full, starts, stops))
+
+  # Join multiple encoded-words with CRLF SPACE per RFC 2047
+  paste0(encoded_words, collapse = "\r\n ")
+}
@@ -171,9 +171,9 @@ encode_base64 <- function(x, line_length = 76L, newline = "\r\n") {
   }
 
   if (is.raw(x)) {
-    base64encode(x, 76L, newline)
+    base64encode(x, line_length, newline)
   } else {
-    base64encode(charToRaw(x), 76L, "\r\n")
+    base64encode(charToRaw(x), line_length, newline)
   }
 }
 

@@ -4,39 +4,39 @@ test_that("MIME - Basic functions", {
   expect_true(length(msg$header) > 0)
 
   rv <- gm_to(msg, "adam@ali.as")
-  expect_equal(header_encode(rv$header$To), "adam@ali.as")
+  expect_equal(header_encode_address(rv$header$To), "adam@ali.as")
 
   rv <- gm_from(msg, "bob@ali.as")
-  expect_equal(header_encode(rv$header$From), "bob@ali.as")
+  expect_equal(header_encode_address(rv$header$From), "bob@ali.as")
 
   rv <- gm_to(msg, c("adam@ali.as", "another@ali.as", "bob@ali.as"))
   expect_equal(
-    header_encode(rv$header$To),
+    header_encode_address(rv$header$To),
     "adam@ali.as, another@ali.as, bob@ali.as"
   )
 
   rv <- gm_cc(msg, c("adam@ali.as", "another@ali.as", "bob@ali.as"))
   expect_equal(
-    header_encode(rv$header$Cc),
+    header_encode_address(rv$header$Cc),
     "adam@ali.as, another@ali.as, bob@ali.as"
   )
 
   rv <- gm_bcc(msg, c("adam@ali.as", "another@ali.as", "bob@ali.as"))
   expect_equal(
-    header_encode(rv$header$Bcc),
+    header_encode_address(rv$header$Bcc),
     "adam@ali.as, another@ali.as, bob@ali.as"
   )
 })
 
-test_that("header_encode encodes non-ascii values as base64", {
-  expect_equal(header_encode("f\U00F6\U00F6"), "=?utf-8?B?ZsO2w7Y=?=")
+test_that("header_encode_address encodes non-ascii values as base64", {
+  expect_equal(header_encode_address("f\U00F6\U00F6"), "=?utf-8?B?ZsO2w7Y=?=")
 
   expect_equal(
-    header_encode('"f\U00F6\U00F6 b\U00Er1" <baz@qux.com>'),
+    header_encode_address('"f\U00F6\U00F6 b\U00Er1" <baz@qux.com>'),
     "=?utf-8?B?ImbDtsO2IGIOcjEi?= <baz@qux.com>"
   )
 
-  res <- header_encode(
+  res <- header_encode_address(
     c(
       '"f\U00F6\U00F6 b\U00E1r" <baz@qux.com>',
       '"foo bar" <foo.bar@baz.com>',
@@ -248,3 +248,74 @@ test_that("trailing whitespace", {
     quoted_printable_encode("foo\t \n \t")
   )
 })
+
+test_that("header_encode_text() passes ASCII-only text through", {
+  ascii_subject <- "This is a plain ASCII subject"
+  result <- header_encode_text(ascii_subject)
+  expect_equal(result, ascii_subject)
+
+  long_ascii <- strrep("a", 100)
+  result <- header_encode_text(long_ascii)
+  expect_equal(result, long_ascii)
+})
+
+test_that("header_encode_text() encodes short Unicode text", {
+  # Short subject with Unicode that fits in single encoded-word
+  short_unicode <- "Hello \u00E1\u00E9\u00ED\u00F3\u00FA"
+  result <- header_encode_text(short_unicode)
+
+  # Should not contain CRLF (no folding)
+  expect_no_match(result, "\r\n", fixed = TRUE)
+  # Should be a single encoded-word
+  expect_match(result, "^=[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=$")
+  # Should be within RFC 2047 limit
+  expect_lte(nchar(result), 75)
+})
+
+# https://github.com/r-lib/gmailr/issues/193
+test_that("header_encode_text() folds long non-ASCII text", {
+  long_subject <- paste0("\u00E1", strrep("a", 54), "\u00E1")
+  result <- header_encode_text(long_subject)
+
+  # Should contain CRLF SPACE (folded into multiple encoded-words)
+  expect_match(result, "\r\n ", fixed = TRUE)
+
+  # Each line should be an encoded-word within RFC 2047 limit
+  lines <- strsplit(result, "\r\n ", fixed = TRUE)[[1]]
+  expect_gt(length(lines), 1)
+  for (line in lines) {
+    expect_lte(nchar(line), 75)
+    expect_match(line, "^=[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=$")
+  }
+})
+
+test_that("header_encode_text() roundtrip: encode then decode", {
+  # this is to make sure we break up the encoded-text in chunks of 4 characters
+  original <- "\U0001F389\U0001F38A\U0001F388 C\u00E9l\u00E9bration extraordinaire \u00E0 Z\u00FCrich! \U0001F973\U0001F382\U0001F37E Join us for a tr\u00E8s sp\u00E9cial soir\u00E9e! \U0001F942\U0001F377\U0001F95C"
+  encoded <- header_encode_text(original)
+
+  encoded_words <- strsplit(encoded, "\r\n ", fixed = TRUE)[[1]]
+  encoded_text <- sub("[?]=$", "", sub("^=[?]utf-8[?]B[?]", "", encoded_words))
+
+  # Decode each chunk separately (to verify each is valid base64), then concatenate
+  decoded <- rawToChar(unlist(lapply(encoded_text, base64decode)))
+  Encoding(decoded) <- "UTF-8"
+  expect_equal(decoded, original)
+})
+
+test_that("gm_subject() uses proper encoding in full MIME message", {
+  # Long subject - should be folded
+  long_subject <- paste0("\u00E1", strrep("a", 100), "\u00E1")
+  msg_long <- gm_mime() |>
+    gm_to("test@example.com") |>
+    gm_subject(long_subject) |>
+    gm_text_body("Body")
+
+  msg_long_chr <- as.character(msg_long)
+
+  # The subject should span multiple lines with proper folding
+  expect_match(
+    msg_long_chr,
+    "Subject: =[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=\r\n =[?]utf-8[?]B[?]"
+  )
+})